diff options
author | Qi Wang <interwq@gwu.edu> | 2015-10-27 22:12:10 (GMT) |
---|---|---|
committer | Jason Evans <je@fb.com> | 2015-11-10 22:28:34 (GMT) |
commit | f4a0f32d340985de477bbe329ecdaecd69ed1055 (patch) | |
tree | a148610f4d2253186c59e671dcb065ce3647d2f5 /src/jemalloc.c | |
parent | 710ca112e31e8621177d08162f60158c27dd2974 (diff) | |
download | jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.zip jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.gz jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.bz2 |
Fast-path improvement: reduce # of branches and unnecessary operations.
- Combine multiple runtime branches into a single malloc_slow check.
- Avoid calling arena_choose / size2index / index2size on fast path.
- A few micro optimizations.
Diffstat (limited to 'src/jemalloc.c')
-rw-r--r-- | src/jemalloc.c | 186 |
1 files changed, 133 insertions, 53 deletions
diff --git a/src/jemalloc.c b/src/jemalloc.c index 5a2d324..eed6331 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -70,12 +70,29 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; +/* 0 should be the common case. Set to true to trigger initialization. */ +static bool malloc_slow = true; + +/* When malloc_slow != 0, set the corresponding bits for sanity check. */ +enum { + flag_opt_junk_alloc = (1U), + flag_opt_junk_free = (1U << 1), + flag_opt_quarantine = (1U << 2), + flag_opt_zero = (1U << 3), + flag_opt_utrace = (1U << 4), + flag_in_valgrind = (1U << 5), + flag_opt_xmalloc = (1U << 6) +}; +static uint8_t malloc_slow_flags; + +/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES] = { +const size_t index2size_tab[NSIZES+1] = { #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ ((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)), SIZE_CLASSES #undef SC + ZU(0) }; JEMALLOC_ALIGNED(CACHELINE) @@ -309,14 +326,15 @@ a0ialloc(size_t size, bool zero, bool is_metadata) if (unlikely(malloc_init_a0())) return (NULL); - return (iallocztm(NULL, size, zero, false, is_metadata, a0get())); + return (iallocztm(NULL, size, size2index(size), zero, false, + is_metadata, a0get(), true)); } static void a0idalloc(void *ptr, bool is_metadata) { - idalloctm(NULL, ptr, false, is_metadata); + idalloctm(NULL, ptr, false, is_metadata, true); } void * @@ -839,6 +857,26 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, } static void +malloc_slow_flag_init(void) +{ + /* + * Combine the runtime options into malloc_slow for fast path. Called + * after processing all the options. + */ + malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) + | (opt_junk_free ? flag_opt_junk_free : 0) + | (opt_quarantine ? flag_opt_quarantine : 0) + | (opt_zero ? flag_opt_zero : 0) + | (opt_utrace ? flag_opt_utrace : 0) + | (opt_xmalloc ? flag_opt_xmalloc : 0); + + if (config_valgrind) + malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0); + + malloc_slow = (malloc_slow_flags != 0); +} + +static void malloc_conf_init(void) { unsigned i; @@ -1304,6 +1342,8 @@ malloc_init_hard_finish(void) arenas[0] = a0; malloc_init_state = malloc_init_initialized; + malloc_slow_flag_init(); + return (false); } @@ -1355,34 +1395,36 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, + prof_tctx_t *tctx, bool slow_path) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = imalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = imalloc(tsd, usize); + p = imalloc(tsd, usize, ind, slow_path); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(tsd_t *tsd, size_t usize) +imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imalloc_prof_sample(tsd, usize, tctx); + p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path); else - p = imalloc(tsd, usize); + p = imalloc(tsd, usize, ind, slow_path); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1393,23 +1435,45 @@ imalloc_prof(tsd_t *tsd, size_t usize) } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_body(size_t size, tsd_t **tsd, size_t *usize) +imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) { + szind_t ind; - if (unlikely(malloc_init())) + if (slow_path && unlikely(malloc_init())) return (NULL); *tsd = tsd_fetch(); + ind = size2index(size); + + if (config_stats || + (config_prof && opt_prof) || + (slow_path && config_valgrind && unlikely(in_valgrind))) { + *usize = index2size(ind); + } if (config_prof && opt_prof) { - *usize = s2u(size); if (unlikely(*usize == 0)) return (NULL); - return (imalloc_prof(*tsd, *usize)); + return (imalloc_prof(*tsd, *usize, ind, slow_path)); } - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(*tsd, size)); + return (imalloc(*tsd, size, ind, slow_path)); +} + +JEMALLOC_ALWAYS_INLINE_C void +imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path) +{ + if (unlikely(ret == NULL)) { + if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write("<jemalloc>: Error in malloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(ret, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1424,21 +1488,20 @@ je_malloc(size_t size) if (size == 0) size = 1; - ret = imalloc_body(size, &tsd, &usize); - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write("<jemalloc>: Error in malloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; + if (likely(!malloc_slow)) { + /* + * imalloc_body() is inlined so that fast and slow paths are + * generated separately with statically known slow_path. + */ + ret = imalloc_body(size, &tsd, &usize, false); + imalloc_post_check(ret, tsd, usize, false); + } else { + ret = imalloc_body(size, &tsd, &usize, true); + imalloc_post_check(ret, tsd, usize, true); + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); } - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); } @@ -1576,34 +1639,35 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = icalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = icalloc(tsd, LARGE_MINCLASS, ind_large); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(tsd_t *tsd, size_t usize) +icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(tsd, usize, tctx); + p = icalloc_prof_sample(tsd, usize, ind, tctx); else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1621,6 +1685,7 @@ je_calloc(size_t num, size_t size) void *ret; tsd_t *tsd; size_t num_size; + szind_t ind; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (unlikely(malloc_init())) { @@ -1650,17 +1715,18 @@ je_calloc(size_t num, size_t size) goto label_return; } + ind = size2index(num_size); if (config_prof && opt_prof) { - usize = s2u(num_size); + usize = index2size(ind); if (unlikely(usize == 0)) { ret = NULL; goto label_return; } - ret = icalloc_prof(tsd, usize); + ret = icalloc_prof(tsd, usize, ind); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = s2u(num_size); - ret = icalloc(tsd, num_size); + usize = index2size(ind); + ret = icalloc(tsd, num_size, ind); } label_return: @@ -1725,7 +1791,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1740,10 +1806,15 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) usize = isalloc(ptr, config_prof); if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; - if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(ptr); - iqalloc(tsd, ptr, tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + + if (likely(!slow_path)) + iqalloc(tsd, ptr, tcache, false); + else { + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(ptr); + iqalloc(tsd, ptr, tcache, true); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + } } JEMALLOC_INLINE_C void @@ -1780,7 +1851,7 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + ifree(tsd, ptr, tcache_get(tsd, false), true); return (NULL); } size = 1; @@ -1807,7 +1878,10 @@ je_realloc(void *ptr, size_t size) } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - ret = imalloc_body(size, &tsd, &usize); + if (likely(!malloc_slow)) + ret = imalloc_body(size, &tsd, &usize, false); + else + ret = imalloc_body(size, &tsd, &usize, true); } if (unlikely(ret == NULL)) { @@ -1836,7 +1910,10 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache_get(tsd, false), false); + else + ifree(tsd, ptr, tcache_get(tsd, false), true); } } @@ -1965,12 +2042,14 @@ JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { + szind_t ind; + ind = size2index(usize); if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); if (unlikely(zero)) - return (icalloct(tsd, usize, tcache, arena)); - return (imalloct(tsd, usize, tcache, arena)); + return (icalloct(tsd, usize, ind, tcache, arena)); + return (imalloct(tsd, usize, ind, tcache, arena)); } static void * @@ -2034,9 +2113,10 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) arena_t *arena; if (likely(flags == 0)) { + szind_t ind = size2index(size); if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(tsd, size)); + *usize = index2size(ind); + return (imalloc(tsd, size, ind, true)); } if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, @@ -2375,7 +2455,7 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, tcache); + ifree(tsd_fetch(), ptr, tcache, true); } JEMALLOC_ALWAYS_INLINE_C size_t |