summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/arena.c10
-rw-r--r--src/ctl.c18
-rw-r--r--src/jemalloc.c139
-rw-r--r--src/stats.c1
-rw-r--r--src/tcache.c11
5 files changed, 150 insertions, 29 deletions
diff --git a/src/arena.c b/src/arena.c
index 43bad81..a3a1fdd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,15 @@
/******************************************************************************/
/* Data. */
+const char *percpu_arena_mode_names[] = {
+ "disabled",
+ "percpu",
+ "phycpu"
+};
+
+const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
+percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
+
ssize_t opt_decay_time = DECAY_TIME_DEFAULT;
static ssize_t decay_time_default;
@@ -1629,6 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
arena->nthreads[0] = arena->nthreads[1] = 0;
+ arena->last_thd = NULL;
if (config_stats) {
if (arena_stats_init(tsdn, &arena->stats)) {
diff --git a/src/ctl.c b/src/ctl.c
index 831877b..d4ab699 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -72,6 +72,7 @@ CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
CTL_PROTO(opt_dss)
CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_percpu_arena)
CTL_PROTO(opt_decay_time)
CTL_PROTO(opt_stats_print)
CTL_PROTO(opt_junk)
@@ -229,6 +230,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)},
{NAME("dss"), CTL(opt_dss)},
{NAME("narenas"), CTL(opt_narenas)},
+ {NAME("percpu_arena"), CTL(opt_percpu_arena)},
{NAME("decay_time"), CTL(opt_decay_time)},
{NAME("stats_print"), CTL(opt_stats_print)},
{NAME("junk"), CTL(opt_junk)},
@@ -1284,6 +1286,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
+CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
@@ -1317,10 +1320,10 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
if (oldarena == NULL) {
return EAGAIN;
}
-
newind = oldind = arena_ind_get(oldarena);
WRITE(newind, unsigned);
READ(oldind, unsigned);
+
if (newind != oldind) {
arena_t *newarena;
@@ -1330,6 +1333,19 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
goto label_return;
}
+ if (have_percpu_arena &&
+ (percpu_arena_mode != percpu_arena_disabled)) {
+ if (newind < percpu_arena_ind_limit()) {
+ /*
+ * If perCPU arena is enabled, thread_arena
+ * control is not allowed for the auto arena
+ * range.
+ */
+ ret = EPERM;
+ goto label_return;
+ }
+ }
+
/* Initialize arena if necessary. */
newarena = arena_get(tsd_tsdn(tsd), newind, true);
if (newarena == NULL) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ecfecf9..ce84b3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -56,7 +56,8 @@ static malloc_mutex_t arenas_lock;
* arenas. arenas[narenas_auto..narenas_total) are only used if the application
* takes some action to create them and allocate from them.
*/
-arena_t **arenas;
+JEMALLOC_ALIGNED(CACHELINE)
+arena_t *arenas[MALLOCX_ARENA_MAX + 1];
static unsigned narenas_total; /* Use narenas_total_*(). */
static arena_t *a0; /* arenas[0]; read-only after initialization. */
unsigned narenas_auto; /* Read-only after initialization. */
@@ -543,6 +544,16 @@ arena_t *
arena_choose_hard(tsd_t *tsd, bool internal) {
arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
+ if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
+ unsigned choose = percpu_arena_choose();
+ ret = arena_get(tsd_tsdn(tsd), choose, true);
+ assert(ret != NULL);
+ arena_bind(tsd, arena_ind_get(ret), false);
+ arena_bind(tsd, arena_ind_get(ret), true);
+
+ return ret;
+ }
+
if (narenas_auto > 1) {
unsigned i, j, choose[2], first_null;
@@ -1095,6 +1106,30 @@ malloc_conf_init(void) {
"lg_tcache_max", -1,
(sizeof(size_t) << 3) - 1)
}
+ if (strncmp("percpu_arena", k, klen) == 0) {
+ int i;
+ bool match = false;
+ for (i = 0; i < percpu_arena_mode_limit; i++) {
+ if (strncmp(percpu_arena_mode_names[i],
+ v, vlen) == 0) {
+ if (!have_percpu_arena) {
+ malloc_conf_error(
+ "No getcpu support",
+ k, klen, v, vlen);
+ }
+ percpu_arena_mode = i;
+ opt_percpu_arena =
+ percpu_arena_mode_names[i];
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ malloc_conf_error("Invalid conf value",
+ k, klen, v, vlen);
+ }
+ continue;
+ }
if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof", true)
CONF_HANDLE_CHAR_P(opt_prof_prefix,
@@ -1204,8 +1239,6 @@ malloc_init_hard_a0_locked() {
* malloc_ncpus().
*/
narenas_auto = 1;
- narenas_total_set(narenas_auto);
- arenas = &a0;
memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
/*
* Initialize one arena here. The rest are lazily created in
@@ -1215,7 +1248,7 @@ malloc_init_hard_a0_locked() {
== NULL) {
return true;
}
-
+ a0 = arena_get(TSDN_NULL, 0, false);
malloc_init_state = malloc_init_a0_initialized;
return false;
@@ -1255,23 +1288,76 @@ malloc_init_hard_recursible(void) {
return false;
}
-static bool
-malloc_init_hard_finish(tsdn_t *tsdn) {
- if (malloc_mutex_boot()) {
- return true;
+static unsigned
+malloc_narenas_default(void) {
+ assert(ncpus > 0);
+ /*
+ * For SMP systems, create more than one arena per CPU by
+ * default.
+ */
+ if (ncpus > 1) {
+ return ncpus << 2;
+ } else {
+ return 1;
}
+}
- if (opt_narenas == 0) {
- /*
- * For SMP systems, create more than one arena per CPU by
- * default.
- */
- if (ncpus > 1) {
- opt_narenas = ncpus << 2;
+static bool
+malloc_init_narenas(void) {
+ assert(ncpus > 0);
+
+ if (percpu_arena_mode != percpu_arena_disabled) {
+ if (!have_percpu_arena || malloc_getcpu() < 0) {
+ percpu_arena_mode = percpu_arena_disabled;
+ malloc_printf("<jemalloc>: perCPU arena getcpu() not "
+ "available. Setting narenas to %u.\n", opt_narenas ?
+ opt_narenas : malloc_narenas_default());
+ if (opt_abort) {
+ abort();
+ }
} else {
- opt_narenas = 1;
+ if (ncpus > MALLOCX_ARENA_MAX) {
+ malloc_printf("<jemalloc>: narenas w/ percpu"
+ "arena beyond limit (%d)\n", ncpus);
+ if (opt_abort) {
+ abort();
+ }
+ return true;
+ }
+ if ((percpu_arena_mode == per_phycpu_arena) &&
+ (ncpus % 2 != 0)) {
+ malloc_printf("<jemalloc>: invalid "
+ "configuration -- per physical CPU arena "
+ "with odd number (%u) of CPUs (no hyper "
+ "threading?).\n", ncpus);
+ if (opt_abort)
+ abort();
+ }
+ unsigned n = percpu_arena_ind_limit();
+ if (opt_narenas < n) {
+ /*
+ * If narenas is specified with percpu_arena
+ * enabled, actual narenas is set as the greater
+ * of the two. percpu_arena_choose will be free
+ * to use any of the arenas based on CPU
+ * id. This is conservative (at a small cost)
+ * but ensures correctness.
+ *
+ * If for some reason the ncpus determined at
+ * boot is not the actual number (e.g. because
+ * of affinity setting from numactl), reserving
+ * narenas this way provides a workaround for
+ * percpu_arena.
+ */
+ opt_narenas = n;
+ }
}
}
+ if (opt_narenas == 0) {
+ opt_narenas = malloc_narenas_default();
+ }
+ assert(opt_narenas > 0);
+
narenas_auto = opt_narenas;
/*
* Limit the number of arenas to the indexing range of MALLOCX_ARENA().
@@ -1283,14 +1369,13 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
}
narenas_total_set(narenas_auto);
- /* Allocate and initialize arenas. */
- arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
- (MALLOCX_ARENA_MAX+1), CACHELINE);
- if (arenas == NULL) {
+ return false;
+}
+
+static bool
+malloc_init_hard_finish(void) {
+ if (malloc_mutex_boot())
return true;
- }
- /* Copy the pointer to the one arena that was already initialized. */
- arena_set(0, a0);
malloc_init_state = malloc_init_initialized;
malloc_slow_flag_init();
@@ -1328,12 +1413,18 @@ malloc_init_hard(void) {
}
malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+ /* Need this before prof_boot2 (for allocation). */
+ if (malloc_init_narenas()) {
+ malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
+ return true;
+ }
+
if (config_prof && prof_boot2(tsd)) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}
- if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+ if (malloc_init_hard_finish()) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}
diff --git a/src/stats.c b/src/stats.c
index ae360e1..776fb86 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -621,6 +621,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",")
+ OPT_WRITE_CHAR_P(percpu_arena, ",")
OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
OPT_WRITE_CHAR_P(junk, ",")
OPT_WRITE_BOOL(zero, ",")
diff --git a/src/tcache.c b/src/tcache.c
index 7857066..266bd1f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -357,12 +357,8 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
static void
tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
- arena_t *arena;
unsigned i;
- arena = arena_choose(tsd, NULL);
- tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
-
for (i = 0; i < NBINS; i++) {
tcache_bin_t *tbin = &tcache->tbins[i];
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
@@ -381,6 +377,13 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
}
}
+ /*
+ * Get arena after flushing -- when using percpu arena, the associated
+ * arena could change during flush.
+ */
+ arena_t *arena = arena_choose(tsd, NULL);
+ tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+
if (config_prof && tcache->prof_accumbytes > 0 &&
arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
prof_idump(tsd_tsdn(tsd));