diff options
author | Qi Wang <interwq@gwu.edu> | 2015-10-27 22:12:10 (GMT) |
---|---|---|
committer | Jason Evans <je@fb.com> | 2015-11-10 22:28:34 (GMT) |
commit | f4a0f32d340985de477bbe329ecdaecd69ed1055 (patch) | |
tree | a148610f4d2253186c59e671dcb065ce3647d2f5 /src/tcache.c | |
parent | 710ca112e31e8621177d08162f60158c27dd2974 (diff) | |
download | jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.zip jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.gz jemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.bz2 |
Fast-path improvement: reduce # of branches and unnecessary operations.
- Combine multiple runtime branches into a single malloc_slow check.
- Avoid calling arena_choose / size2index / index2size on fast path.
- A few micro optimizations.
Diffstat (limited to 'src/tcache.c')
-rw-r--r-- | src/tcache.c | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/src/tcache.c b/src/tcache.c index fdafd0c..78c6230 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) void * tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind) + tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; @@ -80,7 +80,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin); + ret = tcache_alloc_easy(tbin, tcache_success); return (ret); } @@ -102,7 +102,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; @@ -122,7 +122,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == bin_arena) { @@ -139,7 +139,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } @@ -158,8 +158,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, malloc_mutex_unlock(&bin->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; @@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == @@ -220,7 +220,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } @@ -241,8 +241,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, malloc_mutex_unlock(&arena->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -333,9 +333,14 @@ tcache_create(tsd_t *tsd, arena_t *arena) assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + /* + * avail points past the available space. Allocations will + * access the slots toward higher addresses (for the benefit of + * prefetch). + */ tcache->tbins[i].avail = (void **)((uintptr_t)tcache + (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } return (tcache); @@ -379,7 +384,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); - idalloctm(tsd, tcache, false, true); + idalloctm(tsd, tcache, false, true, true); } void |