malloc: Rewrite dumped heap for compatibility in __malloc_set_state
This will allow us to change many aspects of the malloc implementation while preserving compatibility with existing Emacs binaries. As a result, existing Emacs binaries will have a larger RSS, and Emacs needs a few more milliseconds to start. This overhead is specific to Emacs (and will go away once Emacs switches to its internal malloc). The new checks to make free and realloc compatible with the dumped heap are confined to the mmap paths, which are already quite slow due to the munmap overhead. This commit weakens some security checks, but only for heap pointers in the dumped main arena. By default, this area is empty, so those checks are as effective as before.
This commit is contained in:
parent
567c710bca
commit
4cf6c72fd2
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
||||
2016-05-13 Florian Weimer <fweimer@redhat.com>
|
||||
|
||||
* malloc/malloc.c (dumped_main_arena_start)
|
||||
(dumped_main_arena_end): New variables.
|
||||
(DUMPED_MAIN_ARENA_CHUNK): New macro.
|
||||
(do_check_chunk): Skip dumped fake mmapped chunks.
|
||||
(munmap_chunk): Likewise.
|
||||
(__libc_free): Do not adjust statistics for fake mmapped chunks.
|
||||
(__libc_realloc): Adjust hardening check. Always copy dumped fake
|
||||
mmapped chunks.
|
||||
* malloc/hooks.c (__malloc_set_state): Do not update main_arena.
|
||||
Mark dumped chunks as using mmap. Update dumped_main_arena_start
|
||||
and dumped_main_arena_end to cover the dumped heap.
|
||||
|
||||
2016-05-12 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
* conform/data/time.h-data (getdate_err): Do not expect for
|
||||
|
156
malloc/hooks.c
156
malloc/hooks.c
@ -548,11 +548,7 @@ int
|
||||
__malloc_set_state (void *msptr)
|
||||
{
|
||||
struct malloc_save_state *ms = (struct malloc_save_state *) msptr;
|
||||
size_t i;
|
||||
mbinptr b;
|
||||
|
||||
disallow_malloc_check = 1;
|
||||
ptmalloc_init ();
|
||||
if (ms->magic != MALLOC_STATE_MAGIC)
|
||||
return -1;
|
||||
|
||||
@ -560,106 +556,60 @@ __malloc_set_state (void *msptr)
|
||||
if ((ms->version & ~0xffl) > (MALLOC_STATE_VERSION & ~0xffl))
|
||||
return -2;
|
||||
|
||||
(void) mutex_lock (&main_arena.mutex);
|
||||
/* There are no fastchunks. */
|
||||
clear_fastchunks (&main_arena);
|
||||
if (ms->version >= 4)
|
||||
set_max_fast (ms->max_fast);
|
||||
else
|
||||
set_max_fast (64); /* 64 used to be the value we always used. */
|
||||
for (i = 0; i < NFASTBINS; ++i)
|
||||
fastbin (&main_arena, i) = 0;
|
||||
for (i = 0; i < BINMAPSIZE; ++i)
|
||||
main_arena.binmap[i] = 0;
|
||||
top (&main_arena) = ms->av[2];
|
||||
main_arena.last_remainder = 0;
|
||||
for (i = 1; i < NBINS; i++)
|
||||
{
|
||||
b = bin_at (&main_arena, i);
|
||||
if (ms->av[2 * i + 2] == 0)
|
||||
{
|
||||
assert (ms->av[2 * i + 3] == 0);
|
||||
first (b) = last (b) = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ms->version >= 3 &&
|
||||
(i < NSMALLBINS || (largebin_index (chunksize (ms->av[2 * i + 2])) == i &&
|
||||
largebin_index (chunksize (ms->av[2 * i + 3])) == i)))
|
||||
{
|
||||
first (b) = ms->av[2 * i + 2];
|
||||
last (b) = ms->av[2 * i + 3];
|
||||
/* Make sure the links to the bins within the heap are correct. */
|
||||
first (b)->bk = b;
|
||||
last (b)->fd = b;
|
||||
/* Set bit in binblocks. */
|
||||
mark_bin (&main_arena, i);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Oops, index computation from chunksize must have changed.
|
||||
Link the whole list into unsorted_chunks. */
|
||||
first (b) = last (b) = b;
|
||||
b = unsorted_chunks (&main_arena);
|
||||
ms->av[2 * i + 2]->bk = b;
|
||||
ms->av[2 * i + 3]->fd = b->fd;
|
||||
b->fd->bk = ms->av[2 * i + 3];
|
||||
b->fd = ms->av[2 * i + 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ms->version < 3)
|
||||
{
|
||||
/* Clear fd_nextsize and bk_nextsize fields. */
|
||||
b = unsorted_chunks (&main_arena)->fd;
|
||||
while (b != unsorted_chunks (&main_arena))
|
||||
{
|
||||
if (!in_smallbin_range (chunksize (b)))
|
||||
{
|
||||
b->fd_nextsize = NULL;
|
||||
b->bk_nextsize = NULL;
|
||||
}
|
||||
b = b->fd;
|
||||
}
|
||||
}
|
||||
mp_.sbrk_base = ms->sbrk_base;
|
||||
main_arena.system_mem = ms->sbrked_mem_bytes;
|
||||
mp_.trim_threshold = ms->trim_threshold;
|
||||
mp_.top_pad = ms->top_pad;
|
||||
mp_.n_mmaps_max = ms->n_mmaps_max;
|
||||
mp_.mmap_threshold = ms->mmap_threshold;
|
||||
check_action = ms->check_action;
|
||||
main_arena.max_system_mem = ms->max_sbrked_mem;
|
||||
mp_.n_mmaps = ms->n_mmaps;
|
||||
mp_.max_n_mmaps = ms->max_n_mmaps;
|
||||
mp_.mmapped_mem = ms->mmapped_mem;
|
||||
mp_.max_mmapped_mem = ms->max_mmapped_mem;
|
||||
/* add version-dependent code here */
|
||||
if (ms->version >= 1)
|
||||
{
|
||||
/* Check whether it is safe to enable malloc checking, or whether
|
||||
it is necessary to disable it. */
|
||||
if (ms->using_malloc_checking && !using_malloc_checking &&
|
||||
!disallow_malloc_check)
|
||||
__malloc_check_init ();
|
||||
else if (!ms->using_malloc_checking && using_malloc_checking)
|
||||
{
|
||||
__malloc_hook = NULL;
|
||||
__free_hook = NULL;
|
||||
__realloc_hook = NULL;
|
||||
__memalign_hook = NULL;
|
||||
using_malloc_checking = 0;
|
||||
}
|
||||
}
|
||||
if (ms->version >= 4)
|
||||
{
|
||||
mp_.arena_test = ms->arena_test;
|
||||
mp_.arena_max = ms->arena_max;
|
||||
narenas = ms->narenas;
|
||||
}
|
||||
check_malloc_state (&main_arena);
|
||||
/* We do not need to perform locking here because __malloc_set_state
|
||||
must be called before the first call into the malloc subsytem
|
||||
(usually via __malloc_initialize_hook). pthread_create always
|
||||
calls calloc and thus must be called only afterwards, so there
|
||||
cannot be more than one thread when we reach this point. */
|
||||
|
||||
/* Disable the malloc hooks (and malloc checking). */
|
||||
__malloc_hook = NULL;
|
||||
__realloc_hook = NULL;
|
||||
__free_hook = NULL;
|
||||
__memalign_hook = NULL;
|
||||
using_malloc_checking = 0;
|
||||
|
||||
/* Patch the dumped heap. We no longer try to integrate into the
|
||||
existing heap. Instead, we mark the existing chunks as mmapped.
|
||||
Together with the update to dumped_main_arena_start and
|
||||
dumped_main_arena_end, realloc and free will recognize these
|
||||
chunks as dumped fake mmapped chunks and never free them. */
|
||||
|
||||
/* Find the chunk with the lowest address with the heap. */
|
||||
mchunkptr chunk = NULL;
|
||||
{
|
||||
size_t *candidate = (size_t *) ms->sbrk_base;
|
||||
size_t *end = (size_t *) (ms->sbrk_base + ms->sbrked_mem_bytes);
|
||||
while (candidate < end)
|
||||
if (*candidate != 0)
|
||||
{
|
||||
chunk = mem2chunk ((void *) (candidate + 1));
|
||||
break;
|
||||
}
|
||||
else
|
||||
++candidate;
|
||||
}
|
||||
if (chunk == NULL)
|
||||
return 0;
|
||||
|
||||
/* Iterate over the dumped heap and patch the chunks so that they
|
||||
are treated as fake mmapped chunks. */
|
||||
mchunkptr top = ms->av[2];
|
||||
while (chunk < top)
|
||||
{
|
||||
if (inuse (chunk))
|
||||
{
|
||||
/* Mark chunk as mmapped, to trigger the fallback path. */
|
||||
size_t size = chunksize (chunk);
|
||||
set_head (chunk, size | IS_MMAPPED);
|
||||
}
|
||||
chunk = next_chunk (chunk);
|
||||
}
|
||||
|
||||
/* The dumped fake mmapped chunks all lie in this address range. */
|
||||
dumped_main_arena_start = (mchunkptr) ms->sbrk_base;
|
||||
dumped_main_arena_end = top;
|
||||
|
||||
(void) mutex_unlock (&main_arena.mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1758,6 +1758,17 @@ static struct malloc_state main_arena =
|
||||
.attached_threads = 1
|
||||
};
|
||||
|
||||
/* These variables are used for undumping support. Chunked are marked
|
||||
as using mmap, but we leave them alone if they fall into this
|
||||
range. */
|
||||
static mchunkptr dumped_main_arena_start; /* Inclusive. */
|
||||
static mchunkptr dumped_main_arena_end; /* Exclusive. */
|
||||
|
||||
/* True if the pointer falls into the dumped arena. Use this after
|
||||
chunk_is_mmapped indicates a chunk is mmapped. */
|
||||
#define DUMPED_MAIN_ARENA_CHUNK(p) \
|
||||
((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
|
||||
|
||||
/* There is only one instance of the malloc parameters. */
|
||||
|
||||
static struct malloc_par mp_ =
|
||||
@ -1947,7 +1958,7 @@ do_check_chunk (mstate av, mchunkptr p)
|
||||
assert (prev_inuse (p));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (!DUMPED_MAIN_ARENA_CHUNK (p))
|
||||
{
|
||||
/* address is outside main heap */
|
||||
if (contiguous (av) && av->top != initial_top (av))
|
||||
@ -2822,6 +2833,11 @@ munmap_chunk (mchunkptr p)
|
||||
|
||||
assert (chunk_is_mmapped (p));
|
||||
|
||||
/* Do nothing if the chunk is a faked mmapped chunk in the dumped
|
||||
main arena. We never free this memory. */
|
||||
if (DUMPED_MAIN_ARENA_CHUNK (p))
|
||||
return;
|
||||
|
||||
uintptr_t block = (uintptr_t) p - p->prev_size;
|
||||
size_t total_size = p->prev_size + size;
|
||||
/* Unfortunately we have to do the compilers job by hand here. Normally
|
||||
@ -2942,10 +2958,12 @@ __libc_free (void *mem)
|
||||
|
||||
if (chunk_is_mmapped (p)) /* release mmapped memory. */
|
||||
{
|
||||
/* see if the dynamic brk/mmap threshold needs adjusting */
|
||||
/* See if the dynamic brk/mmap threshold needs adjusting.
|
||||
Dumped fake mmapped chunks do not affect the threshold. */
|
||||
if (!mp_.no_dyn_threshold
|
||||
&& p->size > mp_.mmap_threshold
|
||||
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
|
||||
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX
|
||||
&& !DUMPED_MAIN_ARENA_CHUNK (p))
|
||||
{
|
||||
mp_.mmap_threshold = chunksize (p);
|
||||
mp_.trim_threshold = 2 * mp_.mmap_threshold;
|
||||
@ -2995,12 +3013,15 @@ __libc_realloc (void *oldmem, size_t bytes)
|
||||
else
|
||||
ar_ptr = arena_for_chunk (oldp);
|
||||
|
||||
/* Little security check which won't hurt performance: the
|
||||
allocator never wrapps around at the end of the address space.
|
||||
Therefore we can exclude some size values which might appear
|
||||
here by accident or by "design" from some intruder. */
|
||||
if (__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
|
||||
|| __builtin_expect (misaligned_chunk (oldp), 0))
|
||||
/* Little security check which won't hurt performance: the allocator
|
||||
never wrapps around at the end of the address space. Therefore
|
||||
we can exclude some size values which might appear here by
|
||||
accident or by "design" from some intruder. We need to bypass
|
||||
this check for dumped fake mmap chunks from the old main arena
|
||||
because the new malloc may provide additional alignment. */
|
||||
if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
|
||||
|| __builtin_expect (misaligned_chunk (oldp), 0))
|
||||
&& !DUMPED_MAIN_ARENA_CHUNK (oldp))
|
||||
{
|
||||
malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
|
||||
ar_ptr);
|
||||
@ -3011,6 +3032,22 @@ __libc_realloc (void *oldmem, size_t bytes)
|
||||
|
||||
if (chunk_is_mmapped (oldp))
|
||||
{
|
||||
/* If this is a faked mmapped chunk from the dumped main arena,
|
||||
always make a copy (and do not free the old chunk). */
|
||||
if (DUMPED_MAIN_ARENA_CHUNK (oldp))
|
||||
{
|
||||
/* Must alloc, copy, free. */
|
||||
void *newmem = __libc_malloc (bytes);
|
||||
if (newmem == 0)
|
||||
return NULL;
|
||||
/* Copy as many bytes as are available from the old chunk
|
||||
and fit into the new size. */
|
||||
if (bytes > oldsize - 2 * SIZE_SZ)
|
||||
bytes = oldsize - 2 * SIZE_SZ;
|
||||
memcpy (newmem, oldmem, bytes);
|
||||
return newmem;
|
||||
}
|
||||
|
||||
void *newmem;
|
||||
|
||||
#if HAVE_MREMAP
|
||||
|
Loading…
x
Reference in New Issue
Block a user