Browse Source

MDEV-36780: InnoDB buffer pool reserves all assigned memory

In commit b6923420f3 (MDEV-29445)
we started to specify the MAP_POPULATE flag for allocating the
InnoDB buffer pool. This would cause a lot of time to be spent
on __mm_populate() inside the Linux kernel, such as 16 seconds
to pre-fault or commit innodb_buffer_pool_size=64G.

Let us revert to the previous way of allocating the buffer pool
at startup. Note: An attempt to increase the buffer pool size by
SET GLOBAL innodb_buffer_pool_size (up to innodb_buffer_pool_size_max)
will invoke my_virtual_mem_commit(), which will use MAP_POPULATE
to zero-fill and prefault the requested additional memory area, blocking
buf_pool.mutex.

Before MDEV-29445 we allocated the InnoDB buffer pool by invoking
mmap(2) once (via my_large_malloc()). After the change, we would
invoke mmap(2) twice, first via my_virtual_mem_reserve() and then
via my_virtual_mem_commit(). Outside Microsoft Windows, we are
reverting back to my_large_malloc() like allocation.

my_virtual_mem_reserve(): Define only for Microsoft Windows.
Other platforms should invoke my_large_virtual_alloc() and
update_malloc_size() instead of my_virtual_mem_reserve() and
my_virtual_mem_commit().

my_large_virtual_alloc(): Define only outside Microsoft Windows.
Do not specify MAP_NORESERVE nor MAP_POPULATE, to preserve compatibility
with my_large_malloc(). Were MAP_POPULATE specified, the mmap()
system call would be significantly slower, for example 18 seconds
to reserve 64 GiB upfront.
pull/4042/head
Marko Mäkelä 5 months ago
parent
commit
56e0be34bc
  1. 4
      include/my_sys.h
  2. 2
      include/my_virtual_mem.h
  3. 46
      mysys/my_largepage.c
  4. 12
      mysys/my_virtual_mem.c
  5. 8
      storage/innobase/buf/buf0buf.cc

4
include/my_sys.h

@ -177,7 +177,9 @@ extern my_bool my_use_large_pages;
int my_init_large_pages(void);
uchar *my_large_malloc(size_t *size, myf my_flags);
#if defined _WIN32 || defined HAVE_MMAP
#ifdef _WIN32
/* On Windows, use my_virtual_mem_reserve() and my_virtual_mem_commit(). */
#else
char *my_large_virtual_alloc(size_t *size);
#endif
void my_large_free(void *ptr, size_t size);

2
include/my_virtual_mem.h

@ -24,7 +24,9 @@
extern "C" {
#endif
# ifdef _WIN32
char *my_virtual_mem_reserve(size_t *size);
# endif
char *my_virtual_mem_commit(char *ptr, size_t size);
void my_virtual_mem_decommit(char *ptr, size_t size);
void my_virtual_mem_release(char *ptr, size_t size);

46
mysys/my_largepage.c

@ -423,7 +423,7 @@ uchar *my_large_malloc(size_t *size, myf my_flags)
DBUG_RETURN(ptr);
}
#ifdef _WIN32
#ifndef _WIN32
/**
Special large pages allocator, with possibility to commit to allocating
more memory later.
@ -434,37 +434,10 @@ char *my_large_virtual_alloc(size_t *size)
char *ptr;
DBUG_ENTER("my_large_virtual_alloc");
if (my_use_large_pages)
{
size_t s= *size;
s= MY_ALIGN(s, (size_t) my_large_page_size);
ptr= VirtualAlloc(NULL, s, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
PAGE_READWRITE);
if (ptr)
{
*size= s;
DBUG_RETURN(ptr);
}
}
DBUG_RETURN(VirtualAlloc(NULL, *size, MEM_RESERVE, PAGE_READWRITE));
}
#elif defined HAVE_MMAP
/**
Special large pages allocator, with possibility to commit to allocating
more memory later.
Every implementation returns a zero filled buffer here.
*/
char *my_large_mmap(size_t *size, int prot)
{
char *ptr;
DBUG_ENTER("my_large_virtual_alloc");
if (my_use_large_pages)
{
size_t large_page_size;
int page_i= 0;
prot= PROT_READ | PROT_WRITE;
while ((large_page_size= my_next_large_page_size(*size, &page_i)) != 0)
{
@ -488,7 +461,7 @@ char *my_large_mmap(size_t *size, int prot)
OS_MAP_ANON;
size_t aligned_size= MY_ALIGN(*size, (size_t) large_page_size);
ptr= mmap(NULL, aligned_size, prot, mapflag, -1, 0);
ptr= mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, mapflag, -1, 0);
if (ptr == (void*) -1)
{
ptr= NULL;
@ -511,10 +484,7 @@ char *my_large_mmap(size_t *size, int prot)
}
}
ptr= mmap(NULL, *size, prot,
# ifdef MAP_NORESERVE
MAP_NORESERVE |
# endif
ptr= mmap(NULL, *size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | OS_MAP_ANON, -1, 0);
if (ptr == MAP_FAILED)
{
@ -524,16 +494,6 @@ char *my_large_mmap(size_t *size, int prot)
DBUG_RETURN(ptr);
}
/**
Special large pages allocator, with possibility to commit to allocating
more memory later.
Every implementation returns a zero filled buffer here.
*/
char *my_large_virtual_alloc(size_t *size)
{
return my_large_mmap(size, PROT_READ | PROT_WRITE);
}
#endif
/**

12
mysys/my_virtual_mem.c

@ -34,13 +34,9 @@
We try to respect use_large_pages setting, on Windows and Linux
*/
#ifndef _WIN32
char *my_large_mmap(size_t *size, int prot);
#endif
#ifdef _WIN32
char *my_virtual_mem_reserve(size_t *size)
{
#ifdef _WIN32
DWORD flags= my_use_large_pages
? MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT
: MEM_RESERVE;
@ -53,10 +49,8 @@ char *my_virtual_mem_reserve(size_t *size)
my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size);
}
return ptr;
#else
return my_large_mmap(size, PROT_NONE);
#endif
}
#endif
#if defined _WIN32 && !defined DBUG_OFF
static my_bool is_memory_committed(char *ptr, size_t size)
@ -88,7 +82,7 @@ char *my_virtual_mem_commit(char *ptr, size_t size)
}
#else
if (my_use_large_pages)
/* my_large_mmap() already created a read/write mapping. */;
/* my_large_virtual_alloc() already created a read/write mapping. */;
else
{
# ifdef _AIX

8
storage/innobase/buf/buf0buf.cc

@ -1336,7 +1336,11 @@ bool buf_pool_t::create() noexcept
retry:
{
NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
#ifdef _WIN32
memory_unaligned= my_virtual_mem_reserve(&size);
#else
memory_unaligned= my_large_virtual_alloc(&size);
#endif
}
if (!memory_unaligned)
@ -1370,6 +1374,7 @@ bool buf_pool_t::create() noexcept
#ifdef UNIV_PFS_MEMORY
PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner);
#endif
#ifdef _WIN32
if (!my_virtual_mem_commit(memory, actual_size))
{
my_virtual_mem_release(memory_unaligned, size_unaligned);
@ -1377,6 +1382,9 @@ bool buf_pool_t::create() noexcept
memory_unaligned= nullptr;
goto oom;
}
#else
update_malloc_size(actual_size, 0);
#endif
#ifdef HAVE_LIBNUMA
if (srv_numa_interleave)

Loading…
Cancel
Save