summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2010-03-07 23:34:14 (GMT)
committerJason Evans <jasone@canonware.com>2010-03-14 04:38:18 (GMT)
commit3fa9a2fad83a3014d5069b5a2530a0cfb8d8d197 (patch)
tree394d08733ba2c06209e95c13ad11adc993969287
parent2caa4715ed4f787f263239ff97dd824636289286 (diff)
downloadjemalloc-3fa9a2fad83a3014d5069b5a2530a0cfb8d8d197.zip
jemalloc-3fa9a2fad83a3014d5069b5a2530a0cfb8d8d197.tar.gz
jemalloc-3fa9a2fad83a3014d5069b5a2530a0cfb8d8d197.tar.bz2
Simplify tcache object caching.
Use chains of cached objects, rather than using arrays of pointers. Since tcache_bin_t is no longer dynamically sized, convert tcache_t's tbin to an array of structures, rather than an array of pointers. This implicitly removes tcache_bin_{create,destroy}(), which further simplifies the fast path for malloc/free. Use cacheline alignment for tcache_t allocations. Remove runtime configuration option for number of tcache bin slots, and replace it with a boolean option for enabling/disabling tcache. Limit the number of tcache objects to the lesser of TCACHE_NSLOTS_MAX and 2X the number of regions per run for the size class. For GC-triggered flush, discard 3/4 of the objects below the low water mark, rather than 1/2.
-rw-r--r--jemalloc/doc/jemalloc.3.in11
-rw-r--r--jemalloc/include/jemalloc/internal/arena.h6
-rw-r--r--jemalloc/include/jemalloc/internal/tcache.h109
-rw-r--r--jemalloc/src/arena.c60
-rw-r--r--jemalloc/src/ctl.c6
-rw-r--r--jemalloc/src/jemalloc.c9
-rw-r--r--jemalloc/src/stats.c21
-rw-r--r--jemalloc/src/tcache.c197
8 files changed, 172 insertions, 247 deletions
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index 272a5b9..197e68c 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -376,8 +376,7 @@ will disable dirty page purging.
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
@roff_tcache@will disable garbage collection.
@roff_tcache@.It H
-@roff_tcache@Double/halve the number of thread-specific cache slots per size
-@roff_tcache@class.
+@roff_tcache@Enable/disable thread-specific caching.
@roff_tcache@When there are multiple threads, each thread uses a
@roff_tcache@thread-specific cache for small and medium objects.
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
@@ -386,11 +385,7 @@ will disable dirty page purging.
@roff_tcache@See the
@roff_tcache@.Dq G
@roff_tcache@option for related tuning information.
-@roff_tcache@The default number of cache slots is 128;
-@roff_tcache@.Ev JEMALLOC_OPTIONS=7h
-@roff_tcache@will disable thread-specific caching.
-@roff_tcache@Note that one cache slot per size class is not a valid
-@roff_tcache@configuration due to implementation details.
+@roff_tcache@This option is enabled by default.
@roff_prof@.It I
@roff_prof@Double/halve the average interval between memory profile dumps, as
@roff_prof@measured in bytes of allocation activity.
@@ -773,7 +768,7 @@ option.
@roff_xmalloc@option.
@roff_xmalloc@.Ed
.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "opt.lg_tcache_nslots (size_t) r-"
+@roff_tcache@.It Sy "opt.tcache (bool) r-"
@roff_tcache@.Bd -ragged -offset indent -compact
@roff_tcache@See the
@roff_tcache@.Dq H
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index c4803a3..e950d59 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -18,7 +18,11 @@
#ifdef JEMALLOC_TINY
/* Smallest size class to support. */
-# define LG_TINY_MIN 1
+# ifdef JEMALLOC_TCACHE
+# define LG_TINY_MIN LG_SIZEOF_PTR
+# else
+# define LG_TINY_MIN 1
+# endif
#endif
/*
diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h
index b499f52..fa238bf 100644
--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@@ -6,10 +6,13 @@ typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
/*
- * Default number of cache slots for each bin in the thread cache (0:
- * disabled).
+ * Absolute maximum number of cache slots for each bin in the thread cache.
+ * This is an additional constraint beyond that imposed as: twice the number of
+ * regions per run for this size class.
+ *
+ * This constant must be an even number.
*/
-#define LG_TCACHE_NSLOTS_DEFAULT 7
+#define TCACHE_NSLOTS_MAX 200
/*
* (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
* events between full GC sweeps (-1: disabled). Integer rounding may cause
@@ -29,7 +32,8 @@ struct tcache_bin_s {
unsigned low_water; /* Min # cached since last GC. */
unsigned high_water; /* Max # cached since last GC. */
unsigned ncached; /* # of cached objects. */
- void *slots[1]; /* Dynamically sized. */
+ unsigned ncached_max; /* Upper limit on ncached. */
+ void *avail; /* Chain of available objects. */
};
struct tcache_s {
@@ -42,26 +46,20 @@ struct tcache_s {
arena_t *arena; /* This thread's arena. */
unsigned ev_cnt; /* Event count since incremental GC. */
unsigned next_gc_bin; /* Next bin to GC. */
- tcache_bin_t *tbins[1]; /* Dynamically sized. */
+ tcache_bin_t tbins[1]; /* Dynamically sized. */
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-extern size_t opt_lg_tcache_nslots;
+extern bool opt_tcache;
extern ssize_t opt_lg_tcache_gc_sweep;
/* Map of thread-specific caches. */
extern __thread tcache_t *tcache_tls
JEMALLOC_ATTR(tls_model("initial-exec"));
-/*
- * Number of cache slots for each bin in the thread cache, or 0 if tcache is
- * disabled.
- */
-extern size_t tcache_nslots;
-
/* Number of tcache allocation/deallocation events between incremental GCs. */
extern unsigned tcache_gc_incr;
@@ -71,10 +69,7 @@ void tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
);
tcache_t *tcache_create(arena_t *arena);
-void tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
- unsigned binind);
void *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind);
-tcache_bin_t *tcache_bin_create(arena_t *arena);
void tcache_destroy(tcache_t *tcache);
#ifdef JEMALLOC_STATS
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
@@ -99,7 +94,7 @@ tcache_get(void)
{
tcache_t *tcache;
- if (isthreaded == false || tcache_nslots == 0)
+ if ((isthreaded & opt_tcache) == false)
return (NULL);
tcache = tcache_tls;
@@ -124,37 +119,24 @@ tcache_event(tcache_t *tcache)
tcache->ev_cnt++;
assert(tcache->ev_cnt <= tcache_gc_incr);
- if (tcache->ev_cnt >= tcache_gc_incr) {
+ if (tcache->ev_cnt == tcache_gc_incr) {
size_t binind = tcache->next_gc_bin;
- tcache_bin_t *tbin = tcache->tbins[binind];
-
- if (tbin != NULL) {
- if (tbin->high_water == 0) {
- /*
- * This bin went completely unused for an
- * entire GC cycle, so throw away the tbin.
- */
- assert(tbin->ncached == 0);
- tcache_bin_destroy(tcache, tbin, binind);
- tcache->tbins[binind] = NULL;
- } else {
- if (tbin->low_water > 0) {
- /*
- * Flush (ceiling) half of the objects
- * below the low water mark.
- */
- tcache_bin_flush(tbin, binind,
- tbin->ncached - (tbin->low_water >>
- 1) - (tbin->low_water & 1)
+ tcache_bin_t *tbin = &tcache->tbins[binind];
+
+ if (tbin->low_water > 0) {
+ /*
+ * Flush (ceiling) 3/4 of the objects below the low
+ * water mark.
+ */
+ tcache_bin_flush(tbin, binind, tbin->ncached -
+ tbin->low_water + (tbin->low_water >> 2)
#ifdef JEMALLOC_PROF
- , tcache
+ , tcache
#endif
- );
- }
- tbin->low_water = tbin->ncached;
- tbin->high_water = tbin->ncached;
- }
+ );
}
+ tbin->low_water = tbin->ncached;
+ tbin->high_water = tbin->ncached;
tcache->next_gc_bin++;
if (tcache->next_gc_bin == nbins)
@@ -166,21 +148,24 @@ tcache_event(tcache_t *tcache)
JEMALLOC_INLINE void *
tcache_bin_alloc(tcache_bin_t *tbin)
{
+ void *ret;
if (tbin->ncached == 0)
return (NULL);
tbin->ncached--;
if (tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
- return (tbin->slots[tbin->ncached]);
+ ret = tbin->avail;
+ tbin->avail = *(void **)ret;
+ return (ret);
}
JEMALLOC_INLINE void *
tcache_alloc(tcache_t *tcache, size_t size, bool zero)
{
void *ret;
- tcache_bin_t *tbin;
size_t binind;
+ tcache_bin_t *tbin;
if (size <= small_maxclass)
binind = small_size2bin[size];
@@ -189,14 +174,7 @@ tcache_alloc(tcache_t *tcache, size_t size, bool zero)
lg_mspace);
}
assert(binind < nbins);
- tbin = tcache->tbins[binind];
- if (tbin == NULL) {
- tbin = tcache_bin_create(tcache->arena);
- if (tbin == NULL)
- return (NULL);
- tcache->tbins[binind] = tbin;
- }
-
+ tbin = &tcache->tbins[binind];
ret = tcache_bin_alloc(tbin);
if (ret == NULL) {
ret = tcache_alloc_hard(tcache, tbin, binind);
@@ -250,29 +228,20 @@ tcache_dalloc(tcache_t *tcache, void *ptr)
#ifdef JEMALLOC_FILL
if (opt_junk)
- memset(ptr, 0x5a, arena->bins[binind].reg_size);
+ memset(ptr, 0x5a, bin->reg_size);
#endif
- tbin = tcache->tbins[binind];
- if (tbin == NULL) {
- tbin = tcache_bin_create(choose_arena());
- if (tbin == NULL) {
- malloc_mutex_lock(&arena->lock);
- arena_dalloc_bin(arena, chunk, ptr, mapelm);
- malloc_mutex_unlock(&arena->lock);
- return;
- }
- tcache->tbins[binind] = tbin;
- }
-
- if (tbin->ncached == tcache_nslots)
- tcache_bin_flush(tbin, binind, (tcache_nslots >> 1)
+ tbin = &tcache->tbins[binind];
+ if (tbin->ncached == tbin->ncached_max) {
+ tcache_bin_flush(tbin, binind, (tbin->ncached_max >> 1)
#ifdef JEMALLOC_PROF
, tcache
#endif
);
- assert(tbin->ncached < tcache_nslots);
- tbin->slots[tbin->ncached] = ptr;
+ }
+ assert(tbin->ncached < tbin->ncached_max);
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
tbin->ncached++;
if (tbin->ncached > tbin->high_water)
tbin->high_water = tbin->ncached;
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index 248fcdd..1153a0b 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -53,13 +53,26 @@ static malloc_mutex_t purge_lock;
static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
S2B_1(0xffU) /* 0 */
#if (LG_QUANTUM == 4)
-/* 64-bit system ************************/
+/* 16-byte quantum **********************/
# ifdef JEMALLOC_TINY
- S2B_2(0) /* 2 */
- S2B_2(1) /* 4 */
- S2B_4(2) /* 8 */
- S2B_8(3) /* 16 */
+# if (LG_TINY_MIN == 1)
+ S2B_2(0) /* 2 */
+ S2B_2(1) /* 4 */
+ S2B_4(2) /* 8 */
+ S2B_8(3) /* 16 */
# define S2B_QMIN 3
+# elif (LG_TINY_MIN == 2)
+ S2B_4(0) /* 4 */
+ S2B_4(1) /* 8 */
+ S2B_8(2) /* 16 */
+# define S2B_QMIN 2
+# elif (LG_TINY_MIN == 3)
+ S2B_8(0) /* 8 */
+ S2B_8(1) /* 16 */
+# define S2B_QMIN 1
+# else
+# error "Unsupported LG_TINY_MIN"
+# endif
# else
S2B_16(0) /* 16 */
# define S2B_QMIN 0
@@ -73,12 +86,20 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
S2B_16(S2B_QMIN + 7) /* 128 */
# define S2B_CMIN (S2B_QMIN + 8)
#else
-/* 32-bit system ************************/
+/* 8-byte quantum ***********************/
# ifdef JEMALLOC_TINY
- S2B_2(0) /* 2 */
- S2B_2(1) /* 4 */
- S2B_4(2) /* 8 */
+# if (LG_TINY_MIN == 1)
+ S2B_2(0) /* 2 */
+ S2B_2(1) /* 4 */
+ S2B_4(2) /* 8 */
# define S2B_QMIN 2
+# elif (LG_TINY_MIN == 2)
+ S2B_4(0) /* 4 */
+ S2B_4(1) /* 8 */
+# define S2B_QMIN 1
+# else
+# error "Unsupported LG_TINY_MIN"
+# endif
# else
S2B_8(0) /* 8 */
# define S2B_QMIN 0
@@ -1048,28 +1069,15 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
#ifdef JEMALLOC_PROF
arena_prof_accum(arena, prof_accumbytes);
#endif
- for (i = 0, nfill = (tcache_nslots >> 1); i < nfill; i++) {
+ for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_bin_malloc_easy(arena, bin, run);
else
ptr = arena_bin_malloc_hard(arena, bin);
- if (ptr == NULL) {
- if (i > 0) {
- /*
- * Move valid pointers to the base of
- * tbin->slots.
- */
- memmove(&tbin->slots[0],
- &tbin->slots[nfill - i],
- i * sizeof(void *));
- }
+ if (ptr == NULL)
break;
- }
- /*
- * Fill slots such that the objects lowest in memory come last.
- * This causes tcache to use low objects first.
- */
- tbin->slots[nfill - 1 - i] = ptr;
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
}
#ifdef JEMALLOC_STATS
bin->stats.nfills++;
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 33ddbb5..28e9368 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -64,7 +64,7 @@ CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_zero)
#endif
#ifdef JEMALLOC_TCACHE
-CTL_PROTO(opt_lg_tcache_nslots)
+CTL_PROTO(opt_tcache)
CTL_PROTO(opt_lg_tcache_gc_sweep)
#endif
#ifdef JEMALLOC_PROF
@@ -230,7 +230,7 @@ static const ctl_node_t opt_node[] = {
{NAME("zero"), CTL(opt_zero)},
#endif
#ifdef JEMALLOC_TCACHE
- {NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
+ {NAME("tcache"), CTL(opt_tcache)},
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
#endif
#ifdef JEMALLOC_PROF
@@ -1070,7 +1070,7 @@ CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool)
CTL_RO_GEN(opt_zero, opt_zero, bool)
#endif
#ifdef JEMALLOC_TCACHE
-CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
+CTL_RO_GEN(opt_tcache, opt_tcache, bool)
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
#endif
#ifdef JEMALLOC_PROF
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index dc7879f..49c2b0b 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -482,13 +482,10 @@ MALLOC_OUT:
opt_lg_tcache_gc_sweep++;
break;
case 'h':
- if (opt_lg_tcache_nslots > 0)
- opt_lg_tcache_nslots--;
+ opt_tcache = false;
break;
case 'H':
- if (opt_lg_tcache_nslots + 1 <
- (sizeof(size_t) << 3))
- opt_lg_tcache_nslots++;
+ opt_tcache = true;
break;
#endif
#ifdef JEMALLOC_PROF
@@ -729,7 +726,7 @@ MALLOC_OUT:
* default.
*/
#ifdef JEMALLOC_TCACHE
- if (tcache_nslots
+ if (opt_tcache
# ifdef JEMALLOC_PROF
/*
* Profile data storage concurrency is directly linked to
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index 311a5f2..236d7f8 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -440,6 +440,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
== 0)
write_cb(cbopaque, bv ? "F" : "f");
+ if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL,
+ 0)) == 0)
+ write_cb(cbopaque, bv ? "H" : "h");
if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0))
== 0)
write_cb(cbopaque, bv ? "J" : "j");
@@ -550,21 +553,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
write_cb(cbopaque,
"Min active:dirty page ratio per arena: N/A\n");
}
- if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_nslots", &sv,
+ if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
&ssz, NULL, 0)) == 0) {
- size_t tcache_nslots, tcache_gc_sweep;
-
- tcache_nslots = (1U << sv);
- write_cb(cbopaque,
- "Thread cache slots per size class: ");
- write_cb(cbopaque, tcache_nslots ?
- umax2s(tcache_nslots, 10, s) : "N/A");
- write_cb(cbopaque, "\n");
-
- CTL_GET("opt.lg_tcache_gc_sweep", &ssv, ssize_t);
- tcache_gc_sweep = (1U << ssv);
+ size_t tcache_gc_sweep = (1U << ssv);
+ bool tcache_enabled;
+ CTL_GET("opt.tcache", &tcache_enabled, bool);
write_cb(cbopaque, "Thread cache GC sweep interval: ");
- write_cb(cbopaque, tcache_nslots && ssv >= 0 ?
+ write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
umax2s(tcache_gc_sweep, 10, s) : "N/A");
write_cb(cbopaque, "\n");
}
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index e1b1031..dcb72c6 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -4,7 +4,7 @@
/******************************************************************************/
/* Data. */
-size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
+bool opt_tcache = true;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
/* Map of thread-specific caches. */
@@ -16,7 +16,6 @@ __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
*/
static pthread_key_t tcache_tsd;
-size_t tcache_nslots;
unsigned tcache_gc_incr;
/******************************************************************************/
@@ -51,16 +50,14 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
)
{
- arena_chunk_t *chunk;
- arena_t *arena;
- void *ptr;
- unsigned i, ndeferred, ncached;
+ void *flush, *deferred, *ptr;
+ unsigned i, nflush, ndeferred;
- for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
- ncached = ndeferred;
+ for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
+ flush = deferred, nflush = ndeferred) {
/* Lock the arena associated with the first object. */
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
- arena = chunk->arena;
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock);
#ifdef JEMALLOC_PROF
if (arena == tcache->arena) {
@@ -68,9 +65,12 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
tcache->prof_accumbytes = 0;
}
#endif
- /* Deallocate every object that belongs to the locked arena. */
- for (i = ndeferred = 0; i < ncached; i++) {
- ptr = tbin->slots[i];
+ deferred = NULL;
+ ndeferred = 0;
+ for (i = 0; i < nflush; i++) {
+ ptr = flush;
+ assert(ptr != NULL);
+ flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) {
size_t pageind = (((uintptr_t)ptr -
@@ -85,7 +85,8 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
* Stash the object, so that it can be handled
* in a future pass.
*/
- tbin->slots[ndeferred] = ptr;
+ *(void **)ptr = deferred;
+ deferred = ptr;
ndeferred++;
}
}
@@ -105,98 +106,41 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
}
#endif
malloc_mutex_unlock(&arena->lock);
- }
-
- if (rem > 0) {
- /*
- * Shift the remaining valid pointers to the base of the slots
- * array.
- */
- memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
- rem * sizeof(void *));
- }
- tbin->ncached = rem;
-}
-
-tcache_bin_t *
-tcache_bin_create(arena_t *arena)
-{
- tcache_bin_t *ret;
- size_t tsize;
-
- tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
- if (tsize <= small_maxclass)
- ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
- else if (tsize <= bin_maxclass)
- ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
- else
- ret = (tcache_bin_t *)imalloc(tsize);
- if (ret == NULL)
- return (NULL);
-#ifdef JEMALLOC_STATS
- memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
-#endif
- ret->low_water = 0;
- ret->high_water = 0;
- ret->ncached = 0;
-
- return (ret);
-}
-
-void
-tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
-{
- arena_t *arena;
- arena_chunk_t *chunk;
- size_t pageind, tsize;
- arena_chunk_map_t *mapelm;
- chunk = CHUNK_ADDR2BASE(tbin);
- arena = chunk->arena;
- pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapelm = &chunk->map[pageind];
-
-#ifdef JEMALLOC_STATS
- if (tbin->tstats.nrequests != 0) {
- arena_t *arena = tcache->arena;
- arena_bin_t *bin = &arena->bins[binind];
- malloc_mutex_lock(&arena->lock);
- bin->stats.nrequests += tbin->tstats.nrequests;
- if (bin->reg_size <= small_maxclass)
- arena->stats.nmalloc_small += tbin->tstats.nrequests;
- else
- arena->stats.nmalloc_medium += tbin->tstats.nrequests;
- malloc_mutex_unlock(&arena->lock);
+ if (flush != NULL) {
+ /*
+ * This was the first pass, and rem cached objects
+ * remain.
+ */
+ tbin->avail = flush;
+ }
}
-#endif
- assert(tbin->ncached == 0);
- tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
- if (tsize <= bin_maxclass) {
- malloc_mutex_lock(&arena->lock);
- arena_dalloc_bin(arena, chunk, tbin, mapelm);
- malloc_mutex_unlock(&arena->lock);
- } else
- idalloc(tbin);
+ tbin->ncached = rem;
}
tcache_t *
tcache_create(arena_t *arena)
{
tcache_t *tcache;
+ size_t size;
+ unsigned i;
- if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
- small_maxclass) {
- tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
- + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
- } else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
- bin_maxclass) {
- tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
- + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
- } else {
- tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
- (sizeof(tcache_bin_t *) * (nbins - 1)));
- }
+ size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nbins - 1));
+ /*
+ * Round up to the nearest multiple of the cacheline size, in order to
+ * avoid the possibility of false cacheline sharing.
+ *
+ * That this works relies on the same logic as in ipalloc().
+ */
+ size = (size + CACHELINE_MASK) & (-CACHELINE);
+
+ if (size <= small_maxclass)
+ tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+ else if (size <= bin_maxclass)
+ tcache = (tcache_t *)arena_malloc_medium(arena, size, true);
+ else
+ tcache = (tcache_t *)icalloc(size);
if (tcache == NULL)
return (NULL);
@@ -210,6 +154,14 @@ tcache_create(arena_t *arena)
#endif
tcache->arena = arena;
+ assert((TCACHE_NSLOTS_MAX & 1U) == 0);
+ for (i = 0; i < nbins; i++) {
+ if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_MAX) {
+ tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
+ 1);
+ } else
+ tcache->tbins[i].ncached_max = TCACHE_NSLOTS_MAX;
+ }
tcache_tls = tcache;
pthread_setspecific(tcache_tsd, tcache);
@@ -231,15 +183,29 @@ tcache_destroy(tcache_t *tcache)
#endif
for (i = 0; i < nbins; i++) {
- tcache_bin_t *tbin = tcache->tbins[i];
- if (tbin != NULL) {
- tcache_bin_flush(tbin, i, 0
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ tcache_bin_flush(tbin, i, 0
#ifdef JEMALLOC_PROF
- , tcache
+ , tcache
#endif
- );
- tcache_bin_destroy(tcache, tbin, i);
+ );
+
+#ifdef JEMALLOC_STATS
+ if (tbin->tstats.nrequests != 0) {
+ arena_t *arena = tcache->arena;
+ arena_bin_t *bin = &arena->bins[i];
+ malloc_mutex_lock(&arena->lock);
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ if (bin->reg_size <= small_maxclass) {
+ arena->stats.nmalloc_small +=
+ tbin->tstats.nrequests;
+ } else {
+ arena->stats.nmalloc_medium +=
+ tbin->tstats.nrequests;
+ }
+ malloc_mutex_unlock(&arena->lock);
}
+#endif
}
#ifdef JEMALLOC_PROF
@@ -286,21 +252,17 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
/* Merge and reset tcache stats. */
for (i = 0; i < mbin0; i++) {
arena_bin_t *bin = &arena->bins[i];
- tcache_bin_t *tbin = tcache->tbins[i];
- if (tbin != NULL) {
- bin->stats.nrequests += tbin->tstats.nrequests;
- arena->stats.nmalloc_small += tbin->tstats.nrequests;
- tbin->tstats.nrequests = 0;
- }
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ arena->stats.nmalloc_small += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
}
for (; i < nbins; i++) {
arena_bin_t *bin = &arena->bins[i];
- tcache_bin_t *tbin = tcache->tbins[i];
- if (tbin != NULL) {
- bin->stats.nrequests += tbin->tstats.nrequests;
- arena->stats.nmalloc_medium += tbin->tstats.nrequests;
- tbin->tstats.nrequests = 0;
- }
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ arena->stats.nmalloc_medium += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
}
}
#endif
@@ -309,9 +271,7 @@ void
tcache_boot(void)
{
- if (opt_lg_tcache_nslots > 0) {
- tcache_nslots = (1U << opt_lg_tcache_nslots);
-
+ if (opt_tcache) {
/* Compute incremental GC event threshold. */
if (opt_lg_tcache_gc_sweep >= 0) {
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@@ -319,10 +279,7 @@ tcache_boot(void)
0) ? 0 : 1);
} else
tcache_gc_incr = 0;
- } else
- tcache_nslots = 0;
- if (tcache_nslots != 0) {
if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
0) {
malloc_write(