diff options
| author | Qi Wang <interwq@gwu.edu> | 2015-10-27 22:12:10 (GMT) |
|---|---|---|
| committer | Jason Evans <je@fb.com> | 2015-11-10 22:28:34 (GMT) |
| commit | f4a0f32d340985de477bbe329ecdaecd69ed1055 (patch) | |
| tree | a148610f4d2253186c59e671dcb065ce3647d2f5 /include/jemalloc | |
| parent | 710ca112e31e8621177d08162f60158c27dd2974 (diff) | |
| download | jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.zip jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.gz jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.bz2 | |
Fast-path improvement: reduce # of branches and unnecessary operations.
- Combine multiple runtime branches into a single malloc_slow check.
- Avoid calling arena_choose / size2index / index2size on fast path.
- A few micro optimizations.
Diffstat (limited to 'include/jemalloc')
| -rw-r--r-- | include/jemalloc/internal/arena.h | 63 | ||||
| -rw-r--r-- | include/jemalloc/internal/jemalloc_internal.h.in | 62 | ||||
| -rw-r--r-- | include/jemalloc/internal/prof.h | 6 | ||||
| -rw-r--r-- | include/jemalloc/internal/tcache.h | 116 |
4 files changed, 150 insertions, 97 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9e2375c..9715ad9 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -461,8 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc_small(arena_t *arena, size_t size, szind_t ind, + bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, szind_t ind, + bool zero); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); @@ -558,11 +560,11 @@ prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); void arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); -void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif @@ -1158,34 +1160,34 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path) { assert(size != 0); + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return (tcache_alloc_small(tsd, arena, tcache, size, + ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { + return (tcache_alloc_large(tsd, arena, tcache, size, + ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) return (NULL); - if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) { - return (tcache_alloc_small(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= large_maxclass)) { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (likely(tcache != NULL) && size <= tcache_maxclass) { - return (tcache_alloc_large(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_large(arena, size, zero)); - } else - return (huge_malloc(tsd, arena, size, zero, tcache)); + if (likely(size <= SMALL_MAXCLASS)) + return (arena_malloc_small(arena, size, ind, zero)); + if (likely(size <= large_maxclass)) + return (arena_malloc_large(arena, size, ind, zero)); + return (huge_malloc(tsd, arena, size, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1251,7 +1253,7 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1268,7 +1270,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL)) { szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, + slow_path); } else { arena_dalloc_small(extent_node_arena_get( &chunk->node), chunk, ptr, pageind); @@ -1283,7 +1286,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size - large_pad <= tcache_maxclass) { tcache_dalloc_large(tsd, tcache, ptr, size - - large_pad); + large_pad, slow_path); } else { arena_dalloc_large(extent_node_arena_get( &chunk->node), chunk, ptr); @@ -1319,7 +1322,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, true); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1331,7 +1334,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) PAGE_MASK) == 0); if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); + tcache_dalloc_large(tsd, tcache, ptr, size, true); else { arena_dalloc_large(extent_node_arena_get( &chunk->node), chunk, ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 654cd08..d31da4c 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -437,7 +437,7 @@ extern unsigned ncpus; * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES]; +extern size_t const index2size_tab[NSIZES+1]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -624,7 +624,7 @@ JEMALLOC_ALWAYS_INLINE size_t index2size(szind_t index) { - assert(index < NSIZES); + assert(index <= NSIZES); return (index2size_lookup(index)); } @@ -823,12 +823,14 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size); +void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); +void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path); +void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size, szind_t ind); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, @@ -837,10 +839,11 @@ void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path); void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, @@ -881,14 +884,14 @@ isalloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, - arena_t *arena) +iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena, bool slow_path) { void *ret; assert(size != 0); - ret = arena_malloc(tsd, arena, size, zero, tcache); + ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -897,31 +900,33 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, false, tcache, false, arena)); + return (iallocztm(tsd, size, ind, false, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) +imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path) { - return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false, + NULL, slow_path)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, true, tcache, false, arena)); + return (iallocztm(tsd, size, ind, true, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) +icalloc(tsd_t *tsd, size_t size, szind_t ind) { - return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false, + NULL, true)); } JEMALLOC_ALWAYS_INLINE void * @@ -997,7 +1002,8 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path) { assert(ptr != NULL); @@ -1006,31 +1012,31 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) config_prof)); } - arena_dalloc(tsd, ptr, tcache); + arena_dalloc(tsd, ptr, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, true); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, tcache_get(tsd, false), false); + idalloctm(tsd, ptr, tcache_get(tsd, false), false, true); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, slow_path); } JEMALLOC_ALWAYS_INLINE void diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e5198c3..a25502a 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -436,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, cassert(config_prof); tdata = prof_tdata_get(tsd, true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) tdata = NULL; if (tdata_out != NULL) *tdata_out = tdata; - if (tdata == NULL) + if (unlikely(tdata == NULL)) return (true); - if (tdata->bytes_until_sample >= usize) { + if (likely(tdata->bytes_until_sample >= usize)) { if (update) tdata->bytes_until_sample -= usize; return (true); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 5079cd2..c292140 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -70,6 +70,13 @@ struct tcache_bin_s { int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... 1] are available items and the lowest item will + * be allocated first. + */ void **avail; /* Stack of available objects. */ }; @@ -126,7 +133,7 @@ extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind); + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, @@ -155,15 +162,15 @@ void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind); + szind_t binind, bool slow_path); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size); + size_t size, bool slow_path); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif @@ -247,44 +254,69 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { void *ret; if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; + *tcache_success = false; return (NULL); } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); tbin->ncached--; + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; + return (ret); } JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - usize = index2size(binind); - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); - if (ret == NULL) + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind, + &tcache_hard_success); + if (tcache_hard_success == false) return (NULL); } - assert(tcache_salloc(ret) == usize); + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(ret) == usize); + } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); @@ -292,7 +324,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, memset(ret, 0, usize); } } else { - if (config_fill && unlikely(opt_junk_alloc)) { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -309,28 +341,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); - usize = index2size(binind); - assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(arena, usize, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + usize = index2size(binind); + assert(usize <= tcache_maxclass); + ret = arena_malloc_large(arena, usize, binind, zero); if (ret == NULL) return (NULL); } else { + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); @@ -340,7 +382,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, BININD_INVALID); } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) @@ -360,14 +402,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -377,14 +420,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) { szind_t binind; tcache_bin_t *tbin; @@ -396,7 +440,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) binind = size2index(size); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; @@ -406,8 +450,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } |
