From 597632be188d2bcc135dad2145cc46ef44897aad Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 18 Mar 2011 13:41:33 -0700 Subject: Improve thread-->arena assignment. Rather than blindly assigning threads to arenas in round-robin fashion, choose the lowest-numbered arena that currently has the smallest number of threads assigned to it. Add the "stats.arenas..nthreads" mallctl. --- jemalloc/doc/jemalloc.xml.in | 10 +++ jemalloc/include/jemalloc/internal/arena.h | 14 ++++- jemalloc/include/jemalloc/internal/ctl.h | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 3 +- jemalloc/src/arena.c | 1 + jemalloc/src/ctl.c | 13 ++++ jemalloc/src/jemalloc.c | 71 ++++++++++++++++++---- jemalloc/src/stats.c | 4 ++ 8 files changed, 101 insertions(+), 16 deletions(-) diff --git a/jemalloc/doc/jemalloc.xml.in b/jemalloc/doc/jemalloc.xml.in index 97893c1..2bde890 100644 --- a/jemalloc/doc/jemalloc.xml.in +++ b/jemalloc/doc/jemalloc.xml.in @@ -1644,6 +1644,16 @@ malloc_conf = "xmalloc:true";]]> + stats.arenas.<i>.nthreads + (unsigned) + r- + + Number of threads currently assigned to + arena. + + + + stats.arenas.<i>.pactive (size_t) r- diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index 1744b45..94b7f3d 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -295,8 +295,18 @@ struct arena_s { unsigned ind; /* - * All non-bin-related operations on this arena require that lock be - * locked. + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. + */ + unsigned nthreads; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread asssignment (modifies nthreads) is protected by + * arenas_lock. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. */ malloc_mutex_t lock; diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h index 8776ad1..f1f5eb7 100644 --- a/jemalloc/include/jemalloc/internal/ctl.h +++ b/jemalloc/include/jemalloc/internal/ctl.h @@ -29,6 +29,7 @@ struct ctl_node_s { struct ctl_arena_stats_s { bool initialized; + unsigned nthreads; size_t pactive; size_t pdirty; #ifdef JEMALLOC_STATS diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index a80fc7c..a7472c0 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -293,6 +293,7 @@ extern size_t lg_pagesize; extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +extern pthread_key_t arenas_tsd; #ifndef NO_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use @@ -302,9 +303,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define ARENA_GET() arenas_tls # define ARENA_SET(v) do { \ arenas_tls = (v); \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ } while (0) #else -extern pthread_key_t arenas_tsd; # define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) # define ARENA_SET(v) do { \ pthread_setspecific(arenas_tsd, (void *)(v)); \ diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index a1fa2a3..022f9ec 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -2175,6 +2175,7 @@ arena_new(arena_t *arena, unsigned ind) arena_bin_t *bin; arena->ind = ind; + arena->nthreads = 0; if (malloc_mutex_init(&arena->lock)) return (true); diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index c32e955..b4f280d 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) #endif +CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) #ifdef JEMALLOC_STATS @@ -434,6 +435,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = { #endif static const ctl_node_t stats_arenas_i_node[] = { + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} #ifdef JEMALLOC_STATS @@ -620,6 +622,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); + sstats->nthreads += astats->nthreads; #ifdef JEMALLOC_STATS ctl_arena_stats_amerge(astats, arena); /* Merge into sum stats as well. */ @@ -657,10 +660,17 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ + ctl_stats.arenas[narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[narenas]); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + else + ctl_stats.arenas[i].nthreads = 0; + } malloc_mutex_unlock(&arenas_lock); for (i = 0; i < narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -1129,6 +1139,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, malloc_mutex_lock(&arenas_lock); if ((arena = arenas[newind]) == NULL) arena = arenas_extend(newind); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; malloc_mutex_unlock(&arenas_lock); if (arena == NULL) { ret = EAGAIN; @@ -1536,6 +1548,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } #endif +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) #ifdef JEMALLOC_STATS diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 9f2fa92..ecd521c 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -7,12 +7,10 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; -static unsigned next_arena; +pthread_key_t arenas_tsd; #ifndef NO_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#else -pthread_key_t arenas_tsd; #endif #ifdef JEMALLOC_STATS @@ -70,6 +68,7 @@ size_t opt_narenas = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); +static void arenas_cleanup(void *arg); #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg); #endif @@ -147,13 +146,53 @@ choose_arena_hard(void) arena_t *ret; if (narenas > 1) { + unsigned i, choose, first_null; + + choose = 0; + first_null = narenas; malloc_mutex_lock(&arenas_lock); - if ((ret = arenas[next_arena]) == NULL) - ret = arenas_extend(next_arena); - next_arena = (next_arena + 1) % narenas; + assert(arenas[i] != NULL); + for (i = 1; i < narenas; i++) { + if (arenas[i] != NULL) { + /* + * Choose the first arena that has the lowest + * number of threads assigned to it. + */ + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; + } else if (first_null == narenas) { + /* + * Record the index of the first uninitialized + * arena, in case all extant arenas are in use. + * + * NB: It is possible for there to be + * discontinuities in terms of initialized + * versus uninitialized arenas, due to the + * "thread.arena" mallctl. + */ + first_null = i; + } + } + + if (arenas[choose] == 0 || first_null == narenas) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + ret = arenas_extend(first_null); + } + ret->nthreads++; malloc_mutex_unlock(&arenas_lock); - } else + } else { ret = arenas[0]; + malloc_mutex_lock(&arenas_lock); + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } ARENA_SET(ret); @@ -259,6 +298,16 @@ malloc_ncpus(void) return (ret); } +static void +arenas_cleanup(void *arg) +{ + arena_t *arena = (arena_t *)arg; + + malloc_mutex_lock(&arenas_lock); + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); +} + #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg) @@ -737,6 +786,7 @@ malloc_init_hard(void) * threaded mode. */ ARENA_SET(arenas[0]); + arenas[0]->nthreads++; if (malloc_mutex_init(&arenas_lock)) return (true); @@ -779,14 +829,10 @@ malloc_init_hard(void) malloc_write(")\n"); } - next_arena = (narenas > 0) ? 1 : 0; - -#ifdef NO_TLS - if (pthread_key_create(&arenas_tsd, NULL) != 0) { + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { malloc_mutex_unlock(&init_lock); return (true); } -#endif /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); @@ -819,7 +865,6 @@ malloc_init_hard(void) return (false); } - #ifdef JEMALLOC_ZONE JEMALLOC_ATTR(constructor) void diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c index 3dfe0d2..81105c4 100644 --- a/jemalloc/src/stats.c +++ b/jemalloc/src/stats.c @@ -319,6 +319,7 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { + unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -328,6 +329,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); -- cgit v0.12