diff options
author | Jason Evans <je@fb.com> | 2012-10-11 20:53:15 (GMT) |
---|---|---|
committer | Jason Evans <je@fb.com> | 2012-10-13 01:26:16 (GMT) |
commit | 609ae595f0358157b19311b0f9f9591db7cee705 (patch) | |
tree | 2a51758c6218f26167eb8ad2155ccc925c898c3b /src/jemalloc.c | |
parent | d0ffd8ed4f6aa4cf7248028eddfcb35f93247fe4 (diff) | |
download | jemalloc-609ae595f0358157b19311b0f9f9591db7cee705.zip jemalloc-609ae595f0358157b19311b0f9f9591db7cee705.tar.gz jemalloc-609ae595f0358157b19311b0f9f9591db7cee705.tar.bz2 |
Add arena-specific and selective dss allocation.
Add the "arenas.extend" mallctl, so that it is possible to create new
arenas that are outside the set that jemalloc automatically multiplexes
threads onto.
Add the ALLOCM_ARENA() flag for {,r,d}allocm(), so that it is possible
to explicitly allocate from a particular arena.
Add the "opt.dss" mallctl, which controls the default precedence of dss
allocation relative to mmap allocation.
Add the "arena.<i>.dss" mallctl, which makes it possible to set the
default dss precedence on a per arena or global basis.
Add the "arena.<i>.purge" mallctl, which obsoletes "arenas.purge".
Add the "stats.arenas.<i>.dss" mallctl.
Diffstat (limited to 'src/jemalloc.c')
-rw-r--r-- | src/jemalloc.c | 130 |
1 files changed, 99 insertions, 31 deletions
diff --git a/src/jemalloc.c b/src/jemalloc.c index b2daa30..8a667b6 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -33,7 +33,8 @@ unsigned ncpus; malloc_mutex_t arenas_lock; arena_t **arenas; -unsigned narenas; +unsigned narenas_total; +unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,14 +145,14 @@ choose_arena_hard(void) { arena_t *ret; - if (narenas > 1) { + if (narenas_auto > 1) { unsigned i, choose, first_null; choose = 0; - first_null = narenas; + first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { + for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* * Choose the first arena that has the lowest @@ -160,7 +161,7 @@ choose_arena_hard(void) if (arenas[i]->nthreads < arenas[choose]->nthreads) choose = i; - } else if (first_null == narenas) { + } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized * arena, in case all extant arenas are in use. @@ -174,7 +175,8 @@ choose_arena_hard(void) } } - if (arenas[choose]->nthreads == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -203,7 +205,7 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { - unsigned i; + unsigned narenas, i; /* * Merge stats from extant threads. This is racy, since @@ -212,7 +214,7 @@ stats_print_atexit(void) * out of date by the time they are reported, if other threads * continue to allocate. */ - for (i = 0; i < narenas; i++) { + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arenas[i]; if (arena != NULL) { tcache_t *tcache; @@ -554,6 +556,30 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + if (strncmp("dss", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strncmp(dss_prec_names[i], v, vlen) + == 0) { + if (chunk_dss_prec_set(i)) { + malloc_conf_error( + "Error setting dss", + k, klen, v, vlen); + } else { + opt_dss = + dss_prec_names[i]; + match = true; + break; + } + } + } + if (match == false) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", @@ -699,9 +725,9 @@ malloc_init_hard(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas = 1; + narenas_total = narenas_auto = 1; arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -759,20 +785,21 @@ malloc_init_hard(void) else opt_narenas = 1; } - narenas = opt_narenas; + narenas_auto = opt_narenas; /* * Make sure that the arenas array can be allocated. In practice, this * limit is enough to allow the allocator to function, but the ctl * machinery will fail to allocate memory at far lower limits. */ - if (narenas > chunksize / sizeof(arena_t *)) { - narenas = chunksize / sizeof(arena_t *); + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n", - narenas); + narenas_auto); } + narenas_total = narenas_auto; /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); if (arenas == NULL) { malloc_mutex_unlock(&init_lock); return (true); @@ -781,7 +808,7 @@ malloc_init_hard(void) * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; @@ -1346,18 +1373,19 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment))); if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipallocx(usize, alignment, zero, try_tcache, arena)); else if (zero) - return (icalloc(usize)); + return (icallocx(usize, try_tcache, arena)); else - return (imalloc(usize)); + return (imallocx(usize, try_tcache, arena)); } int @@ -1368,6 +1396,9 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; assert(ptr != NULL); assert(size != 0); @@ -1375,6 +1406,14 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1391,18 +1430,19 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(usize_promoted, alignment, zero, + try_tcache, arena); if (p == NULL) goto label_oom; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } prof_malloc(p, usize, cnt); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } @@ -1439,6 +1479,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); @@ -1446,6 +1489,19 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = true; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); + try_tcache_dalloc = (chunk == *ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + p = *ptr; if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; @@ -1472,9 +1528,10 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= SMALL_MAXCLASS) { - q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move); + alignment, zero, no_move, try_tcache_alloc, + try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (max_usize < PAGE) { @@ -1483,7 +1540,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else usize = isalloc(q, config_prof); } else { - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; usize = isalloc(q, config_prof); @@ -1500,7 +1558,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p, false); old_rzsize = u2rz(old_size); } - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (config_stats) @@ -1561,10 +1620,19 @@ je_dallocm(void *ptr, int flags) { size_t usize; size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + UTRACE(ptr, 0, 0); if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); @@ -1577,7 +1645,7 @@ je_dallocm(void *ptr, int flags) thread_allocated_tsd_get()->deallocated += usize; if (config_valgrind && opt_valgrind) rzsize = p2rz(ptr); - iqalloc(ptr); + iqallocx(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); @@ -1654,7 +1722,7 @@ _malloc_prefork(void) /* Acquire all mutexes in a safe order. */ ctl_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } @@ -1685,7 +1753,7 @@ _malloc_postfork(void) base_postfork_parent(); chunk_postfork_parent(); prof_postfork_parent(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } @@ -1705,7 +1773,7 @@ jemalloc_postfork_child(void) base_postfork_child(); chunk_postfork_child(); prof_postfork_child(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } |