From bbe29d374d0fa5f4684621f16c099294e56c26ef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 30 Jan 2013 15:03:11 -0800 Subject: Fix potential TLS-related memory corruption. Avoid writing to uninitialized TLS as a side effect of deallocation. Initializing TLS during deallocation is unsafe because it is possible that a thread never did any allocation, and that TLS has already been deallocated by the threads library, resulting in write-after-free corruption. These fixes affect prof_tdata and quarantine; all other uses of TLS are already safe, whether intentionally (as for tcache) or unintentionally (as for arenas). --- ChangeLog | 7 ++++ include/jemalloc/internal/private_namespace.h | 6 +++ include/jemalloc/internal/prof.h | 14 +++---- include/jemalloc/internal/quarantine.h | 43 +++++++++++++++++++++ src/jemalloc.c | 27 ++++++++++++- src/prof.c | 18 +++------ src/quarantine.c | 55 +++++++-------------------- 7 files changed, 105 insertions(+), 65 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6578225..ae7d0bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (XXX Not yet released) + + Bug fixes: + - Fix TLS-related memory corruption that could occur during thread exit if the + thread never allocated memory. Only the quarantine and prof facilities were + susceptible. + * 3.3.0 (January 23, 2013) This version includes a few minor performance improvements in addition to the diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 903fb4d..65de316 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -306,7 +306,13 @@ #define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) +#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) #define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_booted JEMALLOC_N(quarantine_booted) +#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) +#define quarantine_init JEMALLOC_N(quarantine_init) +#define quarantine_tls JEMALLOC_N(quarantine_tls) +#define quarantine_tsd JEMALLOC_N(quarantine_tsd) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 47f22ad..119a5b1 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -237,7 +237,7 @@ void prof_postfork_child(void); \ assert(size == s2u(size)); \ \ - prof_tdata = prof_tdata_get(); \ + prof_tdata = prof_tdata_get(true); \ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ if (prof_tdata != NULL) \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ @@ -286,7 +286,7 @@ void prof_postfork_child(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) -prof_tdata_t *prof_tdata_get(void); +prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -304,17 +304,15 @@ malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(void) +prof_tdata_get(bool create) { prof_tdata_t *prof_tdata; cassert(config_prof); prof_tdata = *prof_tdata_tsd_get(); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { - if (prof_tdata == NULL) - prof_tdata = prof_tdata_init(); - } + if (create && prof_tdata == NULL) + prof_tdata = prof_tdata_init(); return (prof_tdata); } @@ -397,7 +395,7 @@ prof_sample_accum_update(size_t size) /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h index 38f3d69..16f677f 100644 --- a/include/jemalloc/internal/quarantine.h +++ b/include/jemalloc/internal/quarantine.h @@ -1,6 +1,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct quarantine_obj_s quarantine_obj_t; +typedef struct quarantine_s quarantine_t; + /* Default per thread quarantine size if valgrind is enabled. */ #define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) @@ -8,17 +11,57 @@ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +quarantine_t *quarantine_init(size_t lg_maxobjs); void quarantine(void *ptr); +void quarantine_cleanup(void *arg); bool quarantine_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) + +void quarantine_alloc_hook(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) +malloc_tsd_externs(quarantine, quarantine_t *) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +JEMALLOC_ALWAYS_INLINE void +quarantine_alloc_hook(void) +{ + quarantine_t *quarantine; + + assert(config_fill && opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL) + quarantine_init(LG_MAXOBJS_INIT); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index c117685..6f6464d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -282,12 +282,30 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } +static JEMALLOC_ATTR(always_inline) void +malloc_thread_init(void) +{ + + /* + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. + */ + if (config_fill && opt_quarantine) + quarantine_alloc_hook(); +} + static JEMALLOC_ATTR(always_inline) bool malloc_init(void) { - if (malloc_initialized == false) - return (malloc_init_hard()); + if (malloc_initialized == false && malloc_init_hard()) + return (true); + malloc_thread_init(); return (false); } @@ -1095,6 +1113,7 @@ je_realloc(void *ptr, size_t size) if (size == 0) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(p). */ + assert(malloc_initialized || IS_INITIALIZER); if (config_prof) { old_size = isalloc(ptr, true); if (config_valgrind && opt_valgrind) @@ -1120,6 +1139,7 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_prof) { old_size = isalloc(ptr, true); @@ -1323,6 +1343,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) size_t ret; assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_ivsalloc) ret = ivsalloc(ptr, config_prof); @@ -1497,6 +1518,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(size != 0); assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (arena_ind != UINT_MAX) { arena_chunk_t *chunk; @@ -1611,6 +1633,7 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) size_t sz; assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); if (config_ivsalloc) sz = ivsalloc(ptr, config_prof); diff --git a/src/prof.c b/src/prof.c index b9f03a0..c133b95 100644 --- a/src/prof.c +++ b/src/prof.c @@ -438,7 +438,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = prof_tdata_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); @@ -684,7 +684,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * avoid a race between the main body of prof_ctx_merge() and entry * into this function. */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); @@ -844,7 +844,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - prof_tdata = prof_tdata_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); prof_enter(prof_tdata); @@ -966,11 +966,7 @@ prof_idump(void) if (prof_booted == false) return; - /* - * Don't call prof_tdata_get() here, because it could cause recursive - * allocation. - */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { @@ -1020,11 +1016,7 @@ prof_gdump(void) if (prof_booted == false) return; - /* - * Don't call prof_tdata_get() here, because it could cause recursive - * allocation. - */ - prof_tdata = *prof_tdata_tsd_get(); + prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { diff --git a/src/quarantine.c b/src/quarantine.c index 9005ab3..cab7e16 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -1,3 +1,4 @@ +#define JEMALLOC_QUARANTINE_C_ #include "jemalloc/internal/jemalloc_internal.h" /* @@ -11,39 +12,17 @@ /******************************************************************************/ /* Data. */ -typedef struct quarantine_obj_s quarantine_obj_t; -typedef struct quarantine_s quarantine_t; - -struct quarantine_obj_s { - void *ptr; - size_t usize; -}; - -struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; -#define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ -}; - -static void quarantine_cleanup(void *arg); - -malloc_tsd_data(static, quarantine, quarantine_t *, NULL) -malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) +malloc_tsd_data(, quarantine, quarantine_t *, NULL) /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static quarantine_t *quarantine_init(size_t lg_maxobjs); static quarantine_t *quarantine_grow(quarantine_t *quarantine); static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ -static quarantine_t * +quarantine_t * quarantine_init(size_t lg_maxobjs) { quarantine_t *quarantine; @@ -119,24 +98,16 @@ quarantine(void *ptr) quarantine = *quarantine_tsd_get(); if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { - if (quarantine == NULL) { - if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) == - NULL) { - idalloc(ptr); - return; - } - } else { - if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * Make a note that quarantine() was called - * after quarantine_cleanup() was called. - */ - quarantine = QUARANTINE_STATE_REINCARNATED; - quarantine_tsd_set(&quarantine); - } - idalloc(ptr); - return; + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called after + * quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); } + idalloc(ptr); + return; } /* * Drain one or more objects if the quarantine size limit would be @@ -169,7 +140,7 @@ quarantine(void *ptr) } } -static void +void quarantine_cleanup(void *arg) { quarantine_t *quarantine = *(quarantine_t **)arg; -- cgit v0.12 From d0e942e4669b8600b0bd7e5ae132ae26d10a40ed Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 14:42:41 -0800 Subject: Fix two quarantine bugs. Internal reallocation of the quarantined object array leaked the old array. Reallocation failure for internal reallocation of the quarantined object array (very unlikely) resulted in memory corruption. --- ChangeLog | 5 +++++ src/quarantine.c | 29 +++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae7d0bf..5f2cc45 100644 --- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,11 @@ found in the git revision history: - Fix TLS-related memory corruption that could occur during thread exit if the thread never allocated memory. Only the quarantine and prof facilities were susceptible. + - Fix two quarantine bugs: + + Internal reallocation of the quarantined object array leaked the old + array. + + Reallocation failure for internal reallocation of the quarantined object + array (very unlikely) resulted in memory corruption. * 3.3.0 (January 23, 2013) diff --git a/src/quarantine.c b/src/quarantine.c index cab7e16..f96a948 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -18,6 +18,7 @@ malloc_tsd_data(, quarantine, quarantine_t *, NULL) /* Function prototypes for non-inline static functions. */ static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain_one(quarantine_t *quarantine); static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); /******************************************************************************/ @@ -47,8 +48,10 @@ quarantine_grow(quarantine_t *quarantine) quarantine_t *ret; ret = quarantine_init(quarantine->lg_maxobjs + 1); - if (ret == NULL) + if (ret == NULL) { + quarantine_drain_one(quarantine); return (quarantine); + } ret->curbytes = quarantine->curbytes; ret->curobjs = quarantine->curobjs; @@ -68,23 +71,29 @@ quarantine_grow(quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } + idalloc(quarantine); return (ret); } static void +quarantine_drain_one(quarantine_t *quarantine) +{ + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); +} + +static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound) { - while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { - quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; - assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloc(obj->ptr); - quarantine->curbytes -= obj->usize; - quarantine->curobjs--; - quarantine->first = (quarantine->first + 1) & ((ZU(1) << - quarantine->lg_maxobjs) - 1); - } + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) + quarantine_drain_one(quarantine); } void -- cgit v0.12 From a7a28c334e5526ba716bf6046eab8d60598183eb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 16:53:58 -0800 Subject: Fix a chunk recycling bug. Fix a chunk recycling bug that could cause the allocator to lose track of whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause corruption if allocating via sbrk(2) (unlikely unless running with the "dss:primary" option specified). This was completely harmless on Linux unless using mlockall(2) (and unlikely even then, unless the --disable-munmap configure option or the "dss:primary" option was specified). This regression was introduced in 3.1.0 by the mlockall(2)/madvise(2) interaction fix. --- ChangeLog | 8 ++++++++ src/chunk.c | 1 + 2 files changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5f2cc45..ee63cb4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,14 @@ found in the git revision history: * 3.x.x (XXX Not yet released) Bug fixes: + - Fix a chunk recycling bug that could cause the allocator to lose track of + whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause + corruption if allocating via sbrk(2) (unlikely unless running with the + "dss:primary" option specified). This was completely harmless on Linux + unless using mlockall(2) (and unlikely even then, unless the + --disable-munmap configure option or the "dss:primary" option was + specified). This regression was introduced in 3.1.0 by the + mlockall(2)/madvise(2) interaction fix. - Fix TLS-related memory corruption that could occur during thread exit if the thread never allocated memory. Only the quarantine and prof facilities were susceptible. diff --git a/src/chunk.c b/src/chunk.c index 46e387e..8cff240 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -111,6 +111,7 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; + node->zeroed = zeroed; extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); node = NULL; -- cgit v0.12 From 06912756cccd0064a9c5c59992dbac1cec68ba3f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Jan 2013 17:02:53 -0800 Subject: Fix Valgrind integration. Fix Valgrind integration to annotate all internally allocated memory in a way that keeps Valgrind happy about internal data structure access. --- ChangeLog | 2 + include/jemalloc/internal/jemalloc_internal.h.in | 1 + include/jemalloc/internal/tcache.h | 4 +- src/arena.c | 15 ++++---- src/base.c | 3 ++ src/chunk.c | 48 +++++++++++++----------- src/chunk_dss.c | 1 - test/ALLOCM_ARENA.c | 1 + test/thread_arena.c | 1 + 9 files changed, 44 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index ee63cb4..3c0af68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,8 @@ found in the git revision history: array. + Reallocation failure for internal reallocation of the quarantined object array (very unlikely) resulted in memory corruption. + - Fix Valgrind integration to annotate all internally allocated memory in a + way that keeps Valgrind happy about internal data structure access. * 3.3.0 (January 23, 2013) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c606c12..6270a08 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -443,6 +443,7 @@ static const bool config_ivsalloc = #define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) #define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) #define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) #define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) #define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) #define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 71900c2..ba36204 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -320,8 +320,8 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; @@ -371,8 +371,8 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (config_stats) tbin->tstats.nrequests++; diff --git a/src/arena.c b/src/arena.c index 8d50f4d..d79e035 100644 --- a/src/arena.c +++ b/src/arena.c @@ -366,8 +366,6 @@ arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) LG_PAGE)), (npages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (npages << LG_PAGE)); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (npages << LG_PAGE)); } static inline void @@ -380,8 +378,6 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) LG_PAGE)), PAGE); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); } static void @@ -513,6 +509,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, run_ind+need_pages-1); } } + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (need_pages << LG_PAGE)); } static arena_chunk_t * @@ -574,6 +572,11 @@ arena_chunk_alloc(arena_t *arena) for (i = map_bias+1; i < chunk_npages-1; i++) arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else if (config_debug) { + VALGRIND_MAKE_MEM_DEFINED( + (void *)arena_mapp_get(chunk, map_bias+1), + (void *)((uintptr_t) + arena_mapp_get(chunk, chunk_npages-1) + - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == unzeroed); @@ -1246,8 +1249,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) (uintptr_t)bin_info->bitmap_offset); /* Initialize run internals. */ - VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset - - bin_info->redzone_size); run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; @@ -1464,8 +1465,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); return (ret); } diff --git a/src/base.c b/src/base.c index b1a5945..4e62e8f 100644 --- a/src/base.c +++ b/src/base.c @@ -63,6 +63,7 @@ base_alloc(size_t size) ret = base_next_addr; base_next_addr = (void *)((uintptr_t)base_next_addr + csize); malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); return (ret); } @@ -88,6 +89,7 @@ base_node_alloc(void) ret = base_nodes; base_nodes = *(extent_node_t **)ret; malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); } else { malloc_mutex_unlock(&base_mtx); ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); @@ -100,6 +102,7 @@ void base_node_dealloc(extent_node_t *node) { + VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); malloc_mutex_lock(&base_mtx); *(extent_node_t **)node = base_nodes; base_nodes = node; diff --git a/src/chunk.c b/src/chunk.c index 8cff240..044f76b 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -120,7 +120,6 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, if (node != NULL) base_node_dealloc(node); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); if (*zero) { if (zeroed == false) memset(ret, 0, size); @@ -131,7 +130,6 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } } return (ret); @@ -180,27 +178,32 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, /* All strategies for allocation failed. */ ret = NULL; label_return: - if (config_ivsalloc && base == false && ret != NULL) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size, true); - return (NULL); + if (ret != NULL) { + if (config_ivsalloc && base == false) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size, true); + return (NULL); + } } - } - if ((config_stats || config_prof) && ret != NULL) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) - prof_gdump(); + if (config_stats || config_prof) { + bool gdump; + malloc_mutex_lock(&chunks_mtx); + if (config_stats) + stats_chunks.nchunks += (size / chunksize); + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = + stats_chunks.curchunks; + if (config_prof) + gdump = true; + } else if (config_prof) + gdump = false; + malloc_mutex_unlock(&chunks_mtx); + if (config_prof && opt_prof && opt_prof_gdump && gdump) + prof_gdump(); + } + if (config_valgrind) + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); @@ -214,6 +217,7 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, extent_node_t *xnode, *node, *prev, key; unzeroed = pages_purge(chunk, size); + VALGRIND_MAKE_MEM_NOACCESS(chunk, size); /* * Allocate a node before acquiring chunks_mtx even though it might not diff --git a/src/chunk_dss.c b/src/chunk_dss.c index d1aea93..24781cc 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -127,7 +127,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); } diff --git a/test/ALLOCM_ARENA.c b/test/ALLOCM_ARENA.c index 1585690..2c52485 100644 --- a/test/ALLOCM_ARENA.c +++ b/test/ALLOCM_ARENA.c @@ -41,6 +41,7 @@ je_thread_start(void *arg) malloc_printf("Unexpected allocm() error\n"); abort(); } + dallocm(p, 0); return (NULL); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 2ffdb5e..c5a21fa 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -17,6 +17,7 @@ je_thread_start(void *arg) malloc_printf("%s(): Error in malloc()\n", __func__); return (void *)1; } + free(p); size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, -- cgit v0.12 From 88c222c8e91499bf5d3fba53b24222df0cda5771 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Feb 2013 11:59:30 -0800 Subject: Fix a prof-related locking order bug. Fix a locking order bug that could cause deadlock during fork if heap profiling were enabled. --- ChangeLog | 2 ++ include/jemalloc/internal/arena.h | 33 ++++++++++++++++++++------------- src/arena.c | 13 ++++++++----- src/jemalloc.c | 6 +++--- src/tcache.c | 15 +++++++++++---- 5 files changed, 44 insertions(+), 25 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3c0af68..bf96306 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,8 @@ found in the git revision history: * 3.x.x (XXX Not yet released) Bug fixes: + - Fix a locking order bug that could cause deadlock during fork if heap + profiling were enabled. - Fix a chunk recycling bug that could cause the allocator to lose track of whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause corruption if allocating via sbrk(2) (unlikely unless running with the diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 8fdee93..f2c18f4 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -463,9 +463,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); -void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, @@ -663,7 +663,7 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { @@ -672,33 +672,40 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) arena->prof_accumbytes += accumbytes; if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); arena->prof_accumbytes -= prof_interval; + return (true); } + return (false); } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) { cassert(config_prof); if (prof_interval == 0) - return; - arena_prof_accum_impl(arena, accumbytes); + return (false); + return (arena_prof_accum_impl(arena, accumbytes)); } -JEMALLOC_INLINE void +JEMALLOC_INLINE bool arena_prof_accum(arena_t *arena, uint64_t accumbytes) { cassert(config_prof); if (prof_interval == 0) - return; - malloc_mutex_lock(&arena->lock); - arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(&arena->lock); + return (false); + + { + bool ret; + + malloc_mutex_lock(&arena->lock); + ret = arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); + return (ret); + } } JEMALLOC_ALWAYS_INLINE size_t diff --git a/src/arena.c b/src/arena.c index d79e035..05a787f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1338,8 +1338,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, assert(tbin->ncached == 0); - if (config_prof) - arena_prof_accum(arena, prof_accumbytes); + if (config_prof && arena_prof_accum(arena, prof_accumbytes)) + prof_idump(); bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1447,8 +1447,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false) - arena_prof_accum(arena, size); + if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) + prof_idump(); if (zero == false) { if (config_fill) { @@ -1475,6 +1475,7 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + UNUSED bool idump; /* Large allocation. */ size = PAGE_CEILING(size); @@ -1493,8 +1494,10 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) - arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, size); malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); if (zero == false) { if (config_fill) { diff --git a/src/jemalloc.c b/src/jemalloc.c index 6f6464d..bc350ed 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1753,12 +1753,12 @@ _malloc_prefork(void) /* Acquire all mutexes in a safe order. */ ctl_prefork(); + prof_prefork(); malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } - prof_prefork(); chunk_prefork(); base_prefork(); huge_prefork(); @@ -1784,12 +1784,12 @@ _malloc_postfork(void) huge_postfork_parent(); base_postfork_parent(); chunk_postfork_parent(); - prof_postfork_parent(); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } malloc_mutex_postfork_parent(&arenas_lock); + prof_postfork_parent(); ctl_postfork_parent(); } @@ -1804,12 +1804,12 @@ jemalloc_postfork_child(void) huge_postfork_child(); base_postfork_child(); chunk_postfork_child(); - prof_postfork_child(); for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } malloc_mutex_postfork_child(&arenas_lock); + prof_postfork_child(); ctl_postfork_child(); } diff --git a/src/tcache.c b/src/tcache.c index 7befdc8..98ed19e 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,7 +97,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_bin_t *bin = &arena->bins[binind]; if (config_prof && arena == tcache->arena) { - arena_prof_accum(arena, tcache->prof_accumbytes); + if (arena_prof_accum(arena, tcache->prof_accumbytes)) + prof_idump(); tcache->prof_accumbytes = 0; } @@ -174,11 +175,14 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); arena_t *arena = chunk->arena; + UNUSED bool idump; + if (config_prof) + idump = false; malloc_mutex_lock(&arena->lock); if ((config_prof || config_stats) && arena == tcache->arena) { if (config_prof) { - arena_prof_accum_locked(arena, + idump = arena_prof_accum_locked(arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } @@ -210,6 +214,8 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, } } malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); } if (config_stats && merged_stats == false) { /* @@ -341,8 +347,9 @@ tcache_destroy(tcache_t *tcache) } } - if (config_prof && tcache->prof_accumbytes > 0) - arena_prof_accum(tcache->arena, tcache->prof_accumbytes); + if (config_prof && tcache->prof_accumbytes > 0 && + arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + prof_idump(); tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { -- cgit v0.12 From 9f9897ad4275e540cf1bea5a6de762c809b7695c Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 28 Jan 2013 15:19:34 -0500 Subject: fix building for s390 systems Checking for __s390x__ means you work on s390x, but not s390 (32bit) systems. So use __s390__ which works for both. With this, `make check` passes on s390. Signed-off-by: Mike Frysinger --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 6270a08..381bd60 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -287,7 +287,7 @@ static const bool config_ivsalloc = # ifdef __powerpc__ # define LG_QUANTUM 4 # endif -# ifdef __s390x__ +# ifdef __s390__ # define LG_QUANTUM 4 # endif # ifdef __SH4__ -- cgit v0.12 From a4915851577c948cf9051d60152944ca2d1b2d59 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Mar 2013 11:19:31 -0800 Subject: Add no-op bodies to VALGRIND_*() macro stubs. Add no-op bodies to VALGRIND_*() macro stubs so that they can be used in contexts like the following without generating a compiler warning about the 'if' statement having an empty body: if (config_valgrind) VALGRIND_MAKE_MEM_UNDEFINED(ret, size); --- include/jemalloc/internal/jemalloc_internal.h.in | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 381bd60..50d84ca 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -440,16 +440,18 @@ static const bool config_ivsalloc = } while (0) #else #define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + do {} while (0) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + do {} while (0) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) #define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) + old_rzsize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) #endif #include "jemalloc/internal/util.h" -- cgit v0.12 From 2298835e70ae79577a1c691020874bb11eefc039 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 6 Mar 2013 11:11:17 -0800 Subject: Update ChangeLog for 3.3.1. --- ChangeLog | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index bf96306..fc096d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,10 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (XXX Not yet released) +* 3.3.1 (March 6, 2013) + + This version fixes bugs that are typically encountered only when utilizing + custom run-time options. Bug fixes: - Fix a locking order bug that could cause deadlock during fork if heap @@ -29,6 +32,7 @@ found in the git revision history: array (very unlikely) resulted in memory corruption. - Fix Valgrind integration to annotate all internally allocated memory in a way that keeps Valgrind happy about internal data structure access. + - Fix building for s390 systems. * 3.3.0 (January 23, 2013) -- cgit v0.12