summaryrefslogtreecommitdiffstats
path: root/include/jemalloc
diff options
context:
space:
mode:
authorQi Wang <interwq@gwu.edu>2015-10-27 22:12:10 (GMT)
committerJason Evans <je@fb.com>2015-11-10 22:28:34 (GMT)
commitf4a0f32d340985de477bbe329ecdaecd69ed1055 (patch)
treea148610f4d2253186c59e671dcb065ce3647d2f5 /include/jemalloc
parent710ca112e31e8621177d08162f60158c27dd2974 (diff)
downloadjemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.zip
jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.gz
jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.bz2
Fast-path improvement: reduce # of branches and unnecessary operations.
- Combine multiple runtime branches into a single malloc_slow check. - Avoid calling arena_choose / size2index / index2size on fast path. - A few micro optimizations.
Diffstat (limited to 'include/jemalloc')
-rw-r--r--include/jemalloc/internal/arena.h63
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in62
-rw-r--r--include/jemalloc/internal/prof.h6
-rw-r--r--include/jemalloc/internal/tcache.h116
4 files changed, 150 insertions, 97 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 9e2375c..9715ad9 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -461,8 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
#endif
void arena_quarantine_junk_small(void *ptr, size_t usize);
-void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
-void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void *arena_malloc_small(arena_t *arena, size_t size, szind_t ind,
+ bool zero);
+void *arena_malloc_large(arena_t *arena, size_t size, szind_t ind,
+ bool zero);
void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
size_t alignment, bool zero, tcache_t *tcache);
void arena_prof_promoted(const void *ptr, size_t size);
@@ -558,11 +560,11 @@ prof_tctx_t *arena_prof_tctx_get(const void *ptr);
void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
void arena_prof_tctx_reset(const void *ptr, size_t usize,
const void *old_ptr, prof_tctx_t *old_tctx);
-void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
- tcache_t *tcache);
+void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+ bool zero, tcache_t *tcache, bool slow_path);
arena_t *arena_aalloc(const void *ptr);
size_t arena_salloc(const void *ptr, bool demote);
-void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
#endif
@@ -1158,34 +1160,34 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
}
JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
- tcache_t *tcache)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+ bool zero, tcache_t *tcache, bool slow_path)
{
assert(size != 0);
+ if (likely(tcache != NULL)) {
+ if (likely(size <= SMALL_MAXCLASS)) {
+ return (tcache_alloc_small(tsd, arena, tcache, size,
+ ind, zero, slow_path));
+ }
+ if (likely(size <= tcache_maxclass)) {
+ return (tcache_alloc_large(tsd, arena, tcache, size,
+ ind, zero, slow_path));
+ }
+ /* (size > tcache_maxclass) case falls through. */
+ assert(size > tcache_maxclass);
+ }
+
arena = arena_choose(tsd, arena);
if (unlikely(arena == NULL))
return (NULL);
- if (likely(size <= SMALL_MAXCLASS)) {
- if (likely(tcache != NULL)) {
- return (tcache_alloc_small(tsd, arena, tcache, size,
- zero));
- } else
- return (arena_malloc_small(arena, size, zero));
- } else if (likely(size <= large_maxclass)) {
- /*
- * Initialize tcache after checking size in order to avoid
- * infinite recursion during tcache initialization.
- */
- if (likely(tcache != NULL) && size <= tcache_maxclass) {
- return (tcache_alloc_large(tsd, arena, tcache, size,
- zero));
- } else
- return (arena_malloc_large(arena, size, zero));
- } else
- return (huge_malloc(tsd, arena, size, zero, tcache));
+ if (likely(size <= SMALL_MAXCLASS))
+ return (arena_malloc_small(arena, size, ind, zero));
+ if (likely(size <= large_maxclass))
+ return (arena_malloc_large(arena, size, ind, zero));
+ return (huge_malloc(tsd, arena, size, zero, tcache));
}
JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1251,7 +1253,7 @@ arena_salloc(const void *ptr, bool demote)
}
JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
@@ -1268,7 +1270,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
if (likely(tcache != NULL)) {
szind_t binind = arena_ptr_small_binind_get(ptr,
mapbits);
- tcache_dalloc_small(tsd, tcache, ptr, binind);
+ tcache_dalloc_small(tsd, tcache, ptr, binind,
+ slow_path);
} else {
arena_dalloc_small(extent_node_arena_get(
&chunk->node), chunk, ptr, pageind);
@@ -1283,7 +1286,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
if (likely(tcache != NULL) && size - large_pad <=
tcache_maxclass) {
tcache_dalloc_large(tsd, tcache, ptr, size -
- large_pad);
+ large_pad, slow_path);
} else {
arena_dalloc_large(extent_node_arena_get(
&chunk->node), chunk, ptr);
@@ -1319,7 +1322,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
/* Small allocation. */
if (likely(tcache != NULL)) {
szind_t binind = size2index(size);
- tcache_dalloc_small(tsd, tcache, ptr, binind);
+ tcache_dalloc_small(tsd, tcache, ptr, binind, true);
} else {
size_t pageind = ((uintptr_t)ptr -
(uintptr_t)chunk) >> LG_PAGE;
@@ -1331,7 +1334,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
PAGE_MASK) == 0);
if (likely(tcache != NULL) && size <= tcache_maxclass)
- tcache_dalloc_large(tsd, tcache, ptr, size);
+ tcache_dalloc_large(tsd, tcache, ptr, size, true);
else {
arena_dalloc_large(extent_node_arena_get(
&chunk->node), chunk, ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 654cd08..d31da4c 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -437,7 +437,7 @@ extern unsigned ncpus;
* index2size_tab encodes the same information as could be computed (at
* unacceptable cost in some code paths) by index2size_compute().
*/
-extern size_t const index2size_tab[NSIZES];
+extern size_t const index2size_tab[NSIZES+1];
/*
* size2index_tab is a compact lookup table that rounds request sizes up to
* size classes. In order to reduce cache footprint, the table is compressed,
@@ -624,7 +624,7 @@ JEMALLOC_ALWAYS_INLINE size_t
index2size(szind_t index)
{
- assert(index < NSIZES);
+ assert(index <= NSIZES);
return (index2size_lookup(index));
}
@@ -823,12 +823,14 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
#ifndef JEMALLOC_ENABLE_INLINE
arena_t *iaalloc(const void *ptr);
size_t isalloc(const void *ptr, bool demote);
-void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache,
- bool is_metadata, arena_t *arena);
-void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
-void *imalloc(tsd_t *tsd, size_t size);
-void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
-void *icalloc(tsd_t *tsd, size_t size);
+void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+ tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
+void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
+ arena_t *arena);
+void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
+void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
+ arena_t *arena);
+void *icalloc(tsd_t *tsd, size_t size, szind_t ind);
void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, bool is_metadata, arena_t *arena);
void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
@@ -837,10 +839,11 @@ void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
size_t ivsalloc(const void *ptr, bool demote);
size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
-void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata);
+void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+ bool slow_path);
void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
void idalloc(tsd_t *tsd, void *ptr);
-void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
@@ -881,14 +884,14 @@ isalloc(const void *ptr, bool demote)
}
JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata,
- arena_t *arena)
+iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+ bool is_metadata, arena_t *arena, bool slow_path)
{
void *ret;
assert(size != 0);
- ret = arena_malloc(tsd, arena, size, zero, tcache);
+ ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
if (config_stats && is_metadata && likely(ret != NULL)) {
arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
config_prof));
@@ -897,31 +900,33 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata
}
JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
+imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
{
- return (iallocztm(tsd, size, false, tcache, false, arena));
+ return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
}
JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size)
+imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
{
- return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL));
+ return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
+ NULL, slow_path));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
+icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
{
- return (iallocztm(tsd, size, true, tcache, false, arena));
+ return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size)
+icalloc(tsd_t *tsd, size_t size, szind_t ind)
{
- return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL));
+ return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
+ NULL, true));
}
JEMALLOC_ALWAYS_INLINE void *
@@ -997,7 +1002,8 @@ p2rz(const void *ptr)
}
JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
+idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+ bool slow_path)
{
assert(ptr != NULL);
@@ -1006,31 +1012,31 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
config_prof));
}
- arena_dalloc(tsd, ptr, tcache);
+ arena_dalloc(tsd, ptr, tcache, slow_path);
}
JEMALLOC_ALWAYS_INLINE void
idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
- idalloctm(tsd, ptr, tcache, false);
+ idalloctm(tsd, ptr, tcache, false, true);
}
JEMALLOC_ALWAYS_INLINE void
idalloc(tsd_t *tsd, void *ptr)
{
- idalloctm(tsd, ptr, tcache_get(tsd, false), false);
+ idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
}
JEMALLOC_ALWAYS_INLINE void
-iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
{
- if (config_fill && unlikely(opt_quarantine))
+ if (slow_path && config_fill && unlikely(opt_quarantine))
quarantine(tsd, ptr);
else
- idalloctm(tsd, ptr, tcache, false);
+ idalloctm(tsd, ptr, tcache, false, slow_path);
}
JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index e5198c3..a25502a 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -436,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
cassert(config_prof);
tdata = prof_tdata_get(tsd, true);
- if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
tdata = NULL;
if (tdata_out != NULL)
*tdata_out = tdata;
- if (tdata == NULL)
+ if (unlikely(tdata == NULL))
return (true);
- if (tdata->bytes_until_sample >= usize) {
+ if (likely(tdata->bytes_until_sample >= usize)) {
if (update)
tdata->bytes_until_sample -= usize;
return (true);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 5079cd2..c292140 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -70,6 +70,13 @@ struct tcache_bin_s {
int low_water; /* Min # cached since last GC. */
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
unsigned ncached; /* # of cached objects. */
+ /*
+ * To make use of adjacent cacheline prefetch, the items in the avail
+ * stack goes to higher address for newer allocations. avail points
+ * just above the available space, which means that
+ * avail[-ncached, ... 1] are available items and the lowest item will
+ * be allocated first.
+ */
void **avail; /* Stack of available objects. */
};
@@ -126,7 +133,7 @@ extern tcaches_t *tcaches;
size_t tcache_salloc(const void *ptr);
void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
- tcache_bin_t *tbin, szind_t binind);
+ tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
szind_t binind, unsigned rem);
void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
@@ -155,15 +162,15 @@ void tcache_flush(void);
bool tcache_enabled_get(void);
tcache_t *tcache_get(tsd_t *tsd, bool create);
void tcache_enabled_set(bool enabled);
-void *tcache_alloc_easy(tcache_bin_t *tbin);
+void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
- size_t size, bool zero);
+ size_t size, szind_t ind, bool zero, bool slow_path);
void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
- size_t size, bool zero);
+ size_t size, szind_t ind, bool zero, bool slow_path);
void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
- szind_t binind);
+ szind_t binind, bool slow_path);
void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
- size_t size);
+ size_t size, bool slow_path);
tcache_t *tcaches_get(tsd_t *tsd, unsigned ind);
#endif
@@ -247,44 +254,69 @@ tcache_event(tsd_t *tsd, tcache_t *tcache)
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin)
+tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
{
void *ret;
if (unlikely(tbin->ncached == 0)) {
tbin->low_water = -1;
+ *tcache_success = false;
return (NULL);
}
+ /*
+ * tcache_success (instead of ret) should be checked upon the return of
+ * this function. We avoid checking (ret == NULL) because there is
+ * never a null stored on the avail stack (which is unknown to the
+ * compiler), and eagerly checking ret would cause pipeline stall
+ * (waiting for the cacheline).
+ */
+ *tcache_success = true;
+ ret = *(tbin->avail - tbin->ncached);
tbin->ncached--;
+
if (unlikely((int)tbin->ncached < tbin->low_water))
tbin->low_water = tbin->ncached;
- ret = tbin->avail[tbin->ncached];
+
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
- bool zero)
+ szind_t binind, bool zero, bool slow_path)
{
void *ret;
- szind_t binind;
- size_t usize;
tcache_bin_t *tbin;
+ bool tcache_success;
+ size_t usize JEMALLOC_CC_SILENCE_INIT(0);
- binind = size2index(size);
assert(binind < NBINS);
tbin = &tcache->tbins[binind];
- usize = index2size(binind);
- ret = tcache_alloc_easy(tbin);
- if (unlikely(ret == NULL)) {
- ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);
- if (ret == NULL)
+ ret = tcache_alloc_easy(tbin, &tcache_success);
+ assert(tcache_success == (ret != NULL));
+ if (unlikely(!tcache_success)) {
+ bool tcache_hard_success;
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+
+ ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
+ &tcache_hard_success);
+ if (tcache_hard_success == false)
return (NULL);
}
- assert(tcache_salloc(ret) == usize);
+
+ assert(ret);
+ /*
+ * Only compute usize if required. The checks in the following if
+ * statement are all static.
+ */
+ if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+ usize = index2size(binind);
+ assert(tcache_salloc(ret) == usize);
+ }
if (likely(!zero)) {
- if (config_fill) {
+ if (slow_path && config_fill) {
if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
&arena_bin_info[binind], false);
@@ -292,7 +324,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
memset(ret, 0, usize);
}
} else {
- if (config_fill && unlikely(opt_junk_alloc)) {
+ if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret, &arena_bin_info[binind],
true);
}
@@ -309,28 +341,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
- bool zero)
+ szind_t binind, bool zero, bool slow_path)
{
void *ret;
- szind_t binind;
- size_t usize;
tcache_bin_t *tbin;
+ bool tcache_success;
+ size_t usize JEMALLOC_CC_SILENCE_INIT(0);
- binind = size2index(size);
- usize = index2size(binind);
- assert(usize <= tcache_maxclass);
assert(binind < nhbins);
tbin = &tcache->tbins[binind];
- ret = tcache_alloc_easy(tbin);
- if (unlikely(ret == NULL)) {
+ ret = tcache_alloc_easy(tbin, &tcache_success);
+ assert(tcache_success == (ret != NULL));
+ if (unlikely(!tcache_success)) {
/*
* Only allocate one large object at a time, because it's quite
* expensive to create one and not use it.
*/
- ret = arena_malloc_large(arena, usize, zero);
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+
+ usize = index2size(binind);
+ assert(usize <= tcache_maxclass);
+ ret = arena_malloc_large(arena, usize, binind, zero);
if (ret == NULL)
return (NULL);
} else {
+ /* Only compute usize on demand */
+ if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+ usize = index2size(binind);
+ assert(usize <= tcache_maxclass);
+ }
+
if (config_prof && usize == LARGE_MINCLASS) {
arena_chunk_t *chunk =
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
@@ -340,7 +382,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
BININD_INVALID);
}
if (likely(!zero)) {
- if (config_fill) {
+ if (slow_path && config_fill) {
if (unlikely(opt_junk_alloc))
memset(ret, 0xa5, usize);
else if (unlikely(opt_zero))
@@ -360,14 +402,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+ bool slow_path)
{
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
- if (config_fill && unlikely(opt_junk_free))
+ if (slow_path && config_fill && unlikely(opt_junk_free))
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
tbin = &tcache->tbins[binind];
@@ -377,14 +420,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
(tbin_info->ncached_max >> 1));
}
assert(tbin->ncached < tbin_info->ncached_max);
- tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
+ *(tbin->avail - tbin->ncached) = ptr;
tcache_event(tsd, tcache);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
+ bool slow_path)
{
szind_t binind;
tcache_bin_t *tbin;
@@ -396,7 +440,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
binind = size2index(size);
- if (config_fill && unlikely(opt_junk_free))
+ if (slow_path && config_fill && unlikely(opt_junk_free))
arena_dalloc_junk_large(ptr, size);
tbin = &tcache->tbins[binind];
@@ -406,8 +450,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
(tbin_info->ncached_max >> 1), tcache);
}
assert(tbin->ncached < tbin_info->ncached_max);
- tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
+ *(tbin->avail - tbin->ncached) = ptr;
tcache_event(tsd, tcache);
}