diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/arena.c | 10 | ||||
-rw-r--r-- | src/ctl.c | 18 | ||||
-rw-r--r-- | src/jemalloc.c | 139 | ||||
-rw-r--r-- | src/stats.c | 1 | ||||
-rw-r--r-- | src/tcache.c | 11 |
5 files changed, 150 insertions, 29 deletions
diff --git a/src/arena.c b/src/arena.c index 43bad81..a3a1fdd 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,6 +4,15 @@ /******************************************************************************/ /* Data. */ +const char *percpu_arena_mode_names[] = { + "disabled", + "percpu", + "phycpu" +}; + +const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT; +percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT; + ssize_t opt_decay_time = DECAY_TIME_DEFAULT; static ssize_t decay_time_default; @@ -1629,6 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } arena->nthreads[0] = arena->nthreads[1] = 0; + arena->last_thd = NULL; if (config_stats) { if (arena_stats_init(tsdn, &arena->stats)) { @@ -72,6 +72,7 @@ CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) +CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_decay_time) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) @@ -229,6 +230,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, + {NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("decay_time"), CTL(opt_decay_time)}, {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, @@ -1284,6 +1286,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) +CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *) CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) @@ -1317,10 +1320,10 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (oldarena == NULL) { return EAGAIN; } - newind = oldind = arena_ind_get(oldarena); WRITE(newind, unsigned); READ(oldind, unsigned); + if (newind != oldind) { arena_t *newarena; @@ -1330,6 +1333,19 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, goto label_return; } + if (have_percpu_arena && + (percpu_arena_mode != percpu_arena_disabled)) { + if (newind < percpu_arena_ind_limit()) { + /* + * If perCPU arena is enabled, thread_arena + * control is not allowed for the auto arena + * range. + */ + ret = EPERM; + goto label_return; + } + } + /* Initialize arena if necessary. */ newarena = arena_get(tsd_tsdn(tsd), newind, true); if (newarena == NULL) { diff --git a/src/jemalloc.c b/src/jemalloc.c index ecfecf9..ce84b3c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -56,7 +56,8 @@ static malloc_mutex_t arenas_lock; * arenas. arenas[narenas_auto..narenas_total) are only used if the application * takes some action to create them and allocate from them. */ -arena_t **arenas; +JEMALLOC_ALIGNED(CACHELINE) +arena_t *arenas[MALLOCX_ARENA_MAX + 1]; static unsigned narenas_total; /* Use narenas_total_*(). */ static arena_t *a0; /* arenas[0]; read-only after initialization. */ unsigned narenas_auto; /* Read-only after initialization. */ @@ -543,6 +544,16 @@ arena_t * arena_choose_hard(tsd_t *tsd, bool internal) { arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL); + if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) { + unsigned choose = percpu_arena_choose(); + ret = arena_get(tsd_tsdn(tsd), choose, true); + assert(ret != NULL); + arena_bind(tsd, arena_ind_get(ret), false); + arena_bind(tsd, arena_ind_get(ret), true); + + return ret; + } + if (narenas_auto > 1) { unsigned i, j, choose[2], first_null; @@ -1095,6 +1106,30 @@ malloc_conf_init(void) { "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } + if (strncmp("percpu_arena", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < percpu_arena_mode_limit; i++) { + if (strncmp(percpu_arena_mode_names[i], + v, vlen) == 0) { + if (!have_percpu_arena) { + malloc_conf_error( + "No getcpu support", + k, klen, v, vlen); + } + percpu_arena_mode = i; + opt_percpu_arena = + percpu_arena_mode_names[i]; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -1204,8 +1239,6 @@ malloc_init_hard_a0_locked() { * malloc_ncpus(). */ narenas_auto = 1; - narenas_total_set(narenas_auto); - arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -1215,7 +1248,7 @@ malloc_init_hard_a0_locked() { == NULL) { return true; } - + a0 = arena_get(TSDN_NULL, 0, false); malloc_init_state = malloc_init_a0_initialized; return false; @@ -1255,23 +1288,76 @@ malloc_init_hard_recursible(void) { return false; } -static bool -malloc_init_hard_finish(tsdn_t *tsdn) { - if (malloc_mutex_boot()) { - return true; +static unsigned +malloc_narenas_default(void) { + assert(ncpus > 0); + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) { + return ncpus << 2; + } else { + return 1; } +} - if (opt_narenas == 0) { - /* - * For SMP systems, create more than one arena per CPU by - * default. - */ - if (ncpus > 1) { - opt_narenas = ncpus << 2; +static bool +malloc_init_narenas(void) { + assert(ncpus > 0); + + if (percpu_arena_mode != percpu_arena_disabled) { + if (!have_percpu_arena || malloc_getcpu() < 0) { + percpu_arena_mode = percpu_arena_disabled; + malloc_printf("<jemalloc>: perCPU arena getcpu() not " + "available. Setting narenas to %u.\n", opt_narenas ? + opt_narenas : malloc_narenas_default()); + if (opt_abort) { + abort(); + } } else { - opt_narenas = 1; + if (ncpus > MALLOCX_ARENA_MAX) { + malloc_printf("<jemalloc>: narenas w/ percpu" + "arena beyond limit (%d)\n", ncpus); + if (opt_abort) { + abort(); + } + return true; + } + if ((percpu_arena_mode == per_phycpu_arena) && + (ncpus % 2 != 0)) { + malloc_printf("<jemalloc>: invalid " + "configuration -- per physical CPU arena " + "with odd number (%u) of CPUs (no hyper " + "threading?).\n", ncpus); + if (opt_abort) + abort(); + } + unsigned n = percpu_arena_ind_limit(); + if (opt_narenas < n) { + /* + * If narenas is specified with percpu_arena + * enabled, actual narenas is set as the greater + * of the two. percpu_arena_choose will be free + * to use any of the arenas based on CPU + * id. This is conservative (at a small cost) + * but ensures correctness. + * + * If for some reason the ncpus determined at + * boot is not the actual number (e.g. because + * of affinity setting from numactl), reserving + * narenas this way provides a workaround for + * percpu_arena. + */ + opt_narenas = n; + } } } + if (opt_narenas == 0) { + opt_narenas = malloc_narenas_default(); + } + assert(opt_narenas > 0); + narenas_auto = opt_narenas; /* * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). @@ -1283,14 +1369,13 @@ malloc_init_hard_finish(tsdn_t *tsdn) { } narenas_total_set(narenas_auto); - /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) * - (MALLOCX_ARENA_MAX+1), CACHELINE); - if (arenas == NULL) { + return false; +} + +static bool +malloc_init_hard_finish(void) { + if (malloc_mutex_boot()) return true; - } - /* Copy the pointer to the one arena that was already initialized. */ - arena_set(0, a0); malloc_init_state = malloc_init_initialized; malloc_slow_flag_init(); @@ -1328,12 +1413,18 @@ malloc_init_hard(void) { } malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); + /* Need this before prof_boot2 (for allocation). */ + if (malloc_init_narenas()) { + malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + return true; + } + if (config_prof && prof_boot2(tsd)) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return true; } - if (malloc_init_hard_finish(tsd_tsdn(tsd))) { + if (malloc_init_hard_finish()) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return true; } diff --git a/src/stats.c b/src/stats.c index ae360e1..776fb86 100644 --- a/src/stats.c +++ b/src/stats.c @@ -621,6 +621,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(abort, ",") OPT_WRITE_CHAR_P(dss, ",") OPT_WRITE_UNSIGNED(narenas, ",") + OPT_WRITE_CHAR_P(percpu_arena, ",") OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",") OPT_WRITE_CHAR_P(junk, ",") OPT_WRITE_BOOL(zero, ",") diff --git a/src/tcache.c b/src/tcache.c index 7857066..266bd1f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -357,12 +357,8 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) { static void tcache_destroy(tsd_t *tsd, tcache_t *tcache) { - arena_t *arena; unsigned i; - arena = arena_choose(tsd, NULL); - tcache_arena_dissociate(tsd_tsdn(tsd), tcache); - for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; tcache_bin_flush_small(tsd, tcache, tbin, i, 0); @@ -381,6 +377,13 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) { } } + /* + * Get arena after flushing -- when using percpu arena, the associated + * arena could change during flush. + */ + arena_t *arena = arena_choose(tsd, NULL); + tcache_arena_dissociate(tsd_tsdn(tsd), tcache); + if (config_prof && tcache->prof_accumbytes > 0 && arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) { prof_idump(tsd_tsdn(tsd)); |