diff options
| -rw-r--r-- | jemalloc/ChangeLog | 62 | ||||
| -rw-r--r-- | jemalloc/src/arena.c | 4 | ||||
| -rw-r--r-- | jemalloc/src/prof.c | 35 |
3 files changed, 67 insertions, 34 deletions
diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog index 290dea1..7b7da78 100644 --- a/jemalloc/ChangeLog +++ b/jemalloc/ChangeLog @@ -6,6 +6,15 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.0.1 + + Bug fixes: + - Fix a race condition in heap profiling that could cause undefined behavior + if opt.prof_accum were disabled. + - Add missing mutex unlocks for some OOM error paths in the heap profiling + code. + - Fix a compilation error for non-C99 builds. + * 2.0.0 This version focuses on the experimental *allocm() API, and on improved @@ -13,35 +22,34 @@ found in the git revision history: improvements are also included. New features: - - Implement the experimental {,r,s,d}allocm() API, which provides a superset - of the functionality available via malloc(), calloc(), posix_memalign(), - realloc(), malloc_usable_size(), and free(). These functions can be used - to allocate/reallocate aligned zeroed memory, ask for optional extra - memory during reallocation, prevent object movement during reallocation, - etc. - - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is - more human-readable, and more flexible. For example: - JEMALLOC_OPTIONS=AJP - is now: - MALLOC_CONF=abort:true,fill:true,stats_print:true - - Port to Apple OS X. Sponsored by Mozilla. - - Make it possible for the application to control thread-->arena mappings - via the "thread.arena" mallctl. - - Add compile-time support for all TLS-related functionality via pthreads - TSD. This is mainly of interest for OS X, which does not support TLS, but - has a TSD implementation with similar performance. - - Override memalign() and valloc() if they are provided by the system. - - Add the "arenas.purge" mallctl, which can be used to synchronously purge - all dirty unused pages. - - Make cumulative heap profiling data optional, so that it is possible to - limit the amount of memory consumed by heap profiling data structures. - - Add per thread allocation counters that can be accessed via the - "thread.allocated" and "thread.deallocated" mallctls. + - Implement the experimental {,r,s,d}allocm() API, which provides a superset + of the functionality available via malloc(), calloc(), posix_memalign(), + realloc(), malloc_usable_size(), and free(). These functions can be used to + allocate/reallocate aligned zeroed memory, ask for optional extra memory + during reallocation, prevent object movement during reallocation, etc. + - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is + more human-readable, and more flexible. For example: + JEMALLOC_OPTIONS=AJP + is now: + MALLOC_CONF=abort:true,fill:true,stats_print:true + - Port to Apple OS X. Sponsored by Mozilla. + - Make it possible for the application to control thread-->arena mappings via + the "thread.arena" mallctl. + - Add compile-time support for all TLS-related functionality via pthreads TSD. + This is mainly of interest for OS X, which does not support TLS, but has a + TSD implementation with similar performance. + - Override memalign() and valloc() if they are provided by the system. + - Add the "arenas.purge" mallctl, which can be used to synchronously purge all + dirty unused pages. + - Make cumulative heap profiling data optional, so that it is possible to + limit the amount of memory consumed by heap profiling data structures. + - Add per thread allocation counters that can be accessed via the + "thread.allocated" and "thread.deallocated" mallctls. Incompatible changes: - - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). - - Increase default backtrace depth from 4 to 128 for heap profiling. - - Disable interval-based profile dumps by default. + - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). + - Increase default backtrace depth from 4 to 128 for heap profiling. + - Disable interval-based profile dumps by default. Bug fixes: - Remove bad assertions in fork handler functions. These assertions could diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 3d4f888..7f939b3 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -304,8 +304,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) static inline void arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { + size_t i; size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); - for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + + for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) assert(p[i] == 0); } #endif diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 84ce1ba..636ccce 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -255,6 +255,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } else \ return; + assert(nignore <= 3); assert(max <= (1U << opt_lg_prof_bt_max)); BT_FRAME(0) @@ -398,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) BT_FRAME(126) BT_FRAME(127) - /* Extras to compensate for NIGNORE. */ + /* Extras to compensate for nignore. */ BT_FRAME(128) BT_FRAME(129) BT_FRAME(130) @@ -496,8 +497,10 @@ prof_lookup(prof_bt_t *bt) opt_lg_prof_tcmax)); /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) + if (ret.p == NULL) { + malloc_mutex_unlock(&ctx.p->lock); return (NULL); + } ql_elm_new(ret.p, cnts_link); ql_elm_new(ret.p, lru_link); } @@ -506,6 +509,7 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + malloc_mutex_unlock(&ctx.p->lock); idalloc(ret.v); return (NULL); } @@ -625,11 +629,14 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function. + * avoid a race condition with this function, and prof_ctx_merge() + * artificially raises ctx->cnt_merged.curobjs in order to avoid a race + * between the main body of prof_ctx_merge() and entry into this + * function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) { + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); @@ -642,6 +649,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { + /* Compensate for increment in prof_ctx_merge(). */ + ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); } @@ -660,9 +669,23 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, cnts_link); if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0) + ctx->cnt_merged.curobjs == 0) { + /* + * Artificially raise ctx->cnt_merged.curobjs in order to keep + * another thread from winning the race to destroy ctx while + * this one has ctx->lock dropped. Without this, it would be + * possible for another thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->cnt_merged.curobjs++; destroy = true; - else + } else destroy = false; malloc_mutex_unlock(&ctx->lock); if (destroy) |
