diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/arena.c | 134 | ||||
-rw-r--r-- | src/base.c | 3 | ||||
-rw-r--r-- | src/chunk.c | 127 | ||||
-rw-r--r-- | src/chunk_dss.c | 37 | ||||
-rw-r--r-- | src/ctl.c | 340 | ||||
-rw-r--r-- | src/huge.c | 7 | ||||
-rw-r--r-- | src/jemalloc.c | 130 | ||||
-rw-r--r-- | src/stats.c | 10 | ||||
-rw-r--r-- | src/tcache.c | 4 |
9 files changed, 603 insertions, 189 deletions
diff --git a/src/arena.c b/src/arena.c index 674ffe9..1e6964a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -372,7 +372,7 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero); + false, &zero, arena->dss_prec); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -1619,52 +1619,6 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, mapelm = arena_mapp_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); } -void -arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) -{ - unsigned i; - - malloc_mutex_lock(&arena->lock); - *nactive += arena->nactive; - *ndirty += arena->ndirty; - - astats->mapped += arena->stats.mapped; - astats->npurge += arena->stats.npurge; - astats->nmadvise += arena->stats.nmadvise; - astats->purged += arena->stats.purged; - astats->allocated_large += arena->stats.allocated_large; - astats->nmalloc_large += arena->stats.nmalloc_large; - astats->ndalloc_large += arena->stats.ndalloc_large; - astats->nrequests_large += arena->stats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; - lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; - lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].curruns += arena->stats.lstats[i].curruns; - } - malloc_mutex_unlock(&arena->lock); - - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - if (config_tcache) { - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - } - bstats[i].nruns += bin->stats.nruns; - bstats[i].reruns += bin->stats.reruns; - bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); - } -} void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) @@ -1877,8 +1831,9 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } void * -arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache) +arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -1897,9 +1852,9 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); } else - ret = arena_malloc(NULL, size + extra, zero, try_tcache); + ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); if (ret == NULL) { if (extra == 0) @@ -1909,9 +1864,10 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + arena); } else - ret = arena_malloc(NULL, size, zero, try_tcache); + ret = arena_malloc(arena, size, zero, try_tcache_alloc); if (ret == NULL) return (NULL); @@ -1926,10 +1882,78 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, copysize = (size < oldsize) ? size : oldsize; VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); return (ret); } +dss_prec_t +arena_dss_prec_get(arena_t *arena) +{ + dss_prec_t ret; + + malloc_mutex_lock(&arena->lock); + ret = arena->dss_prec; + malloc_mutex_unlock(&arena->lock); + return (ret); +} + +void +arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) +{ + + malloc_mutex_lock(&arena->lock); + arena->dss_prec = dss_prec; + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *dss = dss_prec_names[arena->dss_prec]; + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} + bool arena_new(arena_t *arena, unsigned ind) { @@ -1958,6 +1982,8 @@ arena_new(arena_t *arena, unsigned ind) if (config_prof) arena->prof_accumbytes = 0; + arena->dss_prec = chunk_dss_prec_get(); + /* Initialize chunks. */ ql_new(&arena->chunks_dirty); arena->spare = NULL; @@ -32,7 +32,8 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero); + base_pages = chunk_alloc(csize, chunksize, true, &zero, + chunk_dss_prec_get()); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/src/chunk.c b/src/chunk.c index 1730452..5fc9e75 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -4,7 +4,8 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +const char *opt_dss = DSS_DEFAULT; +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; @@ -15,8 +16,10 @@ chunk_stats_t stats_chunks; * address space. Depending on function, different tree orderings are needed, * which is why there are two trees with the same contents. */ -static extent_tree_t chunks_szad; -static extent_tree_t chunks_ad; +static extent_tree_t chunks_szad_mmap; +static extent_tree_t chunks_ad_mmap; +static extent_tree_t chunks_szad_dss; +static extent_tree_t chunks_ad_dss; rtree_t *chunks_rtree; @@ -30,14 +33,17 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool base, +static void *chunk_recycle(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, bool *zero); -static void chunk_record(void *chunk, size_t size); +static void chunk_record(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) +chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, + size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; @@ -62,7 +68,7 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) key.addr = NULL; key.size = alloc_size; malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(&chunks_szad, &key); + node = extent_tree_szad_nsearch(chunks_szad, &key); if (node == NULL) { malloc_mutex_unlock(&chunks_mtx); return (NULL); @@ -73,13 +79,13 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ - extent_tree_szad_remove(&chunks_szad, node); - extent_tree_ad_remove(&chunks_ad, node); + extent_tree_szad_remove(chunks_szad, node); + extent_tree_ad_remove(chunks_ad, node); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ node->size = leadsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } if (trailsize != 0) { @@ -102,8 +108,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } malloc_mutex_unlock(&chunks_mtx); @@ -130,7 +136,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec) { void *ret; @@ -139,19 +146,40 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, base, zero); - if (ret != NULL) - goto label_return; + /* + * Try to recycle an existing mapping. + */ - ret = chunk_alloc_mmap(size, alignment, zero); - if (ret != NULL) + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary && (ret = + chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, + base, zero)) != NULL) + goto label_return; + /* mmap. */ + if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, + alignment, base, zero)) != NULL) + goto label_return; + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary && (ret = + chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment, + base, zero)) != NULL) goto label_return; - if (config_dss) { - ret = chunk_alloc_dss(size, alignment, zero); - if (ret != NULL) - goto label_return; - } + /* + * Try to allocate a new mapping. + */ + + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary && (ret = + chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + /* mmap. */ + if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) + goto label_return; + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary && (ret = + chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; /* All strategies for allocation failed. */ ret = NULL; @@ -191,7 +219,8 @@ label_return: } static void -chunk_record(void *chunk, size_t size) +chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, + size_t size) { bool unzeroed; extent_node_t *xnode, *node, *prev, key; @@ -208,7 +237,7 @@ chunk_record(void *chunk, size_t size) malloc_mutex_lock(&chunks_mtx); key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); + node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ if (node != NULL && node->addr == key.addr) { /* @@ -216,11 +245,11 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = chunk; node->size += size; node->zeroed = (node->zeroed && (unzeroed == false)); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_szad_insert(chunks_szad, node); if (xnode != NULL) base_node_dealloc(xnode); } else { @@ -239,12 +268,12 @@ chunk_record(void *chunk, size_t size) node->addr = chunk; node->size = size; node->zeroed = (unzeroed == false); - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); } /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&chunks_ad, node); + prev = extent_tree_ad_prev(chunks_ad, node); if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == chunk) { /* @@ -252,14 +281,14 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert node from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, prev); - extent_tree_ad_remove(&chunks_ad, prev); + extent_tree_szad_remove(chunks_szad, prev); + extent_tree_ad_remove(chunks_ad, prev); - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = prev->addr; node->size += prev->size; node->zeroed = (node->zeroed && prev->zeroed); - extent_tree_szad_insert(&chunks_szad, node); + extent_tree_szad_insert(chunks_szad, node); base_node_dealloc(prev); } @@ -267,6 +296,20 @@ chunk_record(void *chunk, size_t size) } void +chunk_unmap(void *chunk, size_t size) +{ + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_dss && chunk_in_dss(chunk)) + chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); + else if (chunk_dealloc_mmap(chunk, size)) + chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); +} + +void chunk_dealloc(void *chunk, size_t size, bool unmap) { @@ -279,15 +322,13 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); + assert(stats_chunks.curchunks >= (size / chunksize)); stats_chunks.curchunks -= (size / chunksize); malloc_mutex_unlock(&chunks_mtx); } - if (unmap) { - if ((config_dss && chunk_in_dss(chunk)) || - chunk_dealloc_mmap(chunk, size)) - chunk_record(chunk, size); - } + if (unmap) + chunk_unmap(chunk, size); } bool @@ -307,8 +348,10 @@ chunk_boot(void) } if (config_dss && chunk_dss_boot()) return (true); - extent_tree_szad_new(&chunks_szad); - extent_tree_ad_new(&chunks_ad); + extent_tree_szad_new(&chunks_szad_mmap); + extent_tree_ad_new(&chunks_ad_mmap); + extent_tree_szad_new(&chunks_szad_dss); + extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 2d68e48..24781cc 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,6 +3,16 @@ /******************************************************************************/ /* Data. */ +const char *dss_prec_names[] = { + "disabled", + "primary", + "secondary", + "N/A" +}; + +/* Current dss precedence default, used when creating new arenas. */ +static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; + /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. @@ -29,6 +39,31 @@ sbrk(intptr_t increment) } #endif +dss_prec_t +chunk_dss_prec_get(void) +{ + dss_prec_t ret; + + if (config_dss == false) + return (dss_prec_disabled); + malloc_mutex_lock(&dss_mtx); + ret = dss_prec_default; + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_prec_set(dss_prec_t dss_prec) +{ + + if (config_dss == false) + return (true); + malloc_mutex_lock(&dss_mtx); + dss_prec_default = dss_prec; + malloc_mutex_unlock(&dss_mtx); + return (false); +} + void * chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { @@ -88,7 +123,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_dealloc(cpad, cpad_size, true); + chunk_unmap(cpad, cpad_size); if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); @@ -48,8 +48,8 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ - size_t i); +static const ctl_named_node_t *n##_index(const size_t *mib, \ + size_t miblen, size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); static void ctl_arena_clear(ctl_arena_stats_t *astats); @@ -58,6 +58,7 @@ static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); static void ctl_arena_refresh(arena_t *arena, unsigned i); +static bool ctl_grow(void); static void ctl_refresh(void); static bool ctl_init(void); static int ctl_lookup(const char *name, ctl_node_t const **nodesp, @@ -88,6 +89,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_valgrind) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -110,6 +112,10 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(arena_i_purge) +static int arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_dss) +INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -125,6 +131,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_purge) +CTL_PROTO(arenas_extend) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_interval) @@ -158,6 +165,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -223,6 +231,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -247,6 +256,18 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t arena_i_node[] = { + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)} +}; +static const ctl_named_node_t super_arena_i_node[] = { + {NAME(""), CHILD(named, arena_i)} +}; + +static const ctl_indexed_node_t arena_node[] = { + {INDEX(arena_i)} +}; + static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, @@ -282,7 +303,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)} + {NAME("purge"), CTL(arenas_purge)}, + {NAME("extend"), CTL(arenas_extend)} }; static const ctl_named_node_t prof_node[] = { @@ -352,6 +374,7 @@ static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -387,6 +410,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, {NAME("stats"), CHILD(named, stats)} @@ -420,6 +444,7 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->dss = dss_prec_names[dss_prec_limit]; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -439,8 +464,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, - &cstats->astats, cstats->bstats, cstats->lstats); + arena_stats_merge(arena, &cstats->dss, &cstats->pactive, + &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; @@ -500,7 +525,7 @@ static void ctl_arena_refresh(arena_t *arena, unsigned i) { ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; - ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); @@ -518,11 +543,72 @@ ctl_arena_refresh(arena_t *arena, unsigned i) } } +static bool +ctl_grow(void) +{ + size_t astats_size; + ctl_arena_stats_t *astats; + arena_t **tarenas; + + /* Extend arena stats and arenas arrays. */ + astats_size = (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t); + if (ctl_stats.narenas == narenas_auto) { + /* ctl_stats.arenas and arenas came from base_alloc(). */ + astats = (ctl_arena_stats_t *)imalloc(astats_size); + if (astats == NULL) + return (true); + memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + + tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + sizeof(arena_t *)); + if (tarenas == NULL) { + idalloc(astats); + return (true); + } + memcpy(tarenas, arenas, ctl_stats.narenas * sizeof(arena_t *)); + } else { + astats = (ctl_arena_stats_t *)iralloc(ctl_stats.arenas, + astats_size, 0, 0, false, false); + if (astats == NULL) + return (true); + + tarenas = (arena_t **)iralloc(arenas, (ctl_stats.narenas + 1) * + sizeof(arena_t *), 0, 0, false, false); + if (tarenas == NULL) + return (true); + } + /* Initialize the new astats and arenas elements. */ + memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); + if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) + return (true); + tarenas[ctl_stats.narenas] = NULL; + /* Swap merged stats to their new location. */ + { + ctl_arena_stats_t tstats; + memcpy(&tstats, &astats[ctl_stats.narenas], + sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas], + &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas + 1], &tstats, + sizeof(ctl_arena_stats_t)); + } + ctl_stats.arenas = astats; + ctl_stats.narenas++; + malloc_mutex_lock(&arenas_lock); + arenas = tarenas; + narenas_total++; + arenas_extend(narenas_total - 1); + malloc_mutex_unlock(&arenas_lock); + + return (false); +} + static void ctl_refresh(void) { unsigned i; - VARIABLE_ARRAY(arena_t *, tarenas, narenas); + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -542,19 +628,19 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[narenas].nthreads = 0; - ctl_arena_clear(&ctl_stats.arenas[narenas]); + ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + for (i = 0; i < ctl_stats.narenas; i++) { if (arenas[i] != NULL) ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; else ctl_stats.arenas[i].nthreads = 0; } malloc_mutex_unlock(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); ctl_stats.arenas[i].initialized = initialized; @@ -563,11 +649,13 @@ ctl_refresh(void) } if (config_stats) { - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large + ctl_stats.allocated = + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << - LG_PAGE) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -585,13 +673,15 @@ ctl_init(void) * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ + assert(narenas_auto == narenas_total_get()); + ctl_stats.narenas = narenas_auto; ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( - (narenas + 1) * sizeof(ctl_arena_stats_t)); + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; goto label_return; } - memset(ctl_stats.arenas, 0, (narenas + 1) * + memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); /* @@ -601,14 +691,14 @@ ctl_init(void) */ if (config_stats) { unsigned i; - for (i = 0; i <= narenas; i++) { + for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { ret = true; goto label_return; } } } - ctl_stats.arenas[narenas].initialized = true; + ctl_stats.arenas[ctl_stats.narenas].initialized = true; ctl_epoch = 0; ctl_refresh(); @@ -1084,13 +1174,14 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; unsigned newind, oldind; + malloc_mutex_lock(&ctl_mtx); newind = oldind = choose_arena(NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { arena_t *arena; - if (newind >= narenas) { + if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; goto label_return; @@ -1123,6 +1214,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1156,6 +1248,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1181,10 +1274,124 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) /******************************************************************************/ +static int +arena_purge(unsigned arena_ind) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + { + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena_ind < ctl_stats.narenas); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); + } + } + + ret = 0; + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + ret = arena_purge(mib[1]); + +label_return: + return (ret); +} + +static int +arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret, i; + bool match, err; + const char *dss; + unsigned arena_ind = mib[1]; + dss_prec_t dss_prec_old = dss_prec_limit; + dss_prec_t dss_prec = dss_prec_limit; + + WRITE(dss, const char *); + match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + if (match == false) { + ret = EINVAL; + goto label_return; + } + + if (arena_ind < ctl_stats.narenas) { + arena_t *arena = arenas[arena_ind]; + if (arena != NULL) { + dss_prec_old = arena_dss_prec_get(arena); + arena_dss_prec_set(arena, dss_prec); + err = false; + } else + err = true; + } else { + dss_prec_old = chunk_dss_prec_get(); + err = chunk_dss_prec_set(dss_prec); + } + dss = dss_prec_names[dss_prec_old]; + READ(dss, const char *); + if (err) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + return (ret); +} + +static const ctl_named_node_t * +arena_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_named_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (i > ctl_stats.narenas) { + ret = NULL; + goto label_return; + } + + ret = super_arena_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + + +/******************************************************************************/ + CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1194,7 +1401,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1203,7 +1410,27 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) +static int +arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned narenas; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != sizeof(unsigned)) { + ret = EINVAL; + goto label_return; + } + narenas = ctl_stats.narenas; + READ(narenas, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} static int arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1214,13 +1441,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(&ctl_mtx); READONLY(); - if (*oldlenp != narenas * sizeof(bool)) { + if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; - nread = (*oldlenp < narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : narenas; + nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; - nread = narenas; + nread = ctl_stats.narenas; } for (i = 0; i < nread; i++) @@ -1243,36 +1470,42 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena; + unsigned arena_ind; + malloc_mutex_lock(&ctl_mtx); WRITEONLY(); - arena = UINT_MAX; - WRITE(arena, unsigned); - if (newp != NULL && arena >= narenas) { + arena_ind = UINT_MAX; + WRITE(arena_ind, unsigned); + if (newp != NULL && arena_ind >= ctl_stats.narenas) ret = EFAULT; - goto label_return; - } else { - VARIABLE_ARRAY(arena_t *, tarenas, narenas); + else { + if (arena_ind == UINT_MAX) + arena_ind = ctl_stats.narenas; + ret = arena_purge(arena_ind); + } - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - malloc_mutex_unlock(&arenas_lock); +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} - if (arena == UINT_MAX) { - unsigned i; - for (i = 0; i < narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena < narenas); - if (tarenas[arena] != NULL) - arena_purge_all(tarenas[arena]); - } +static int +arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (ctl_grow()) { + ret = EAGAIN; + goto label_return; } + READ(ctl_stats.narenas - 1, unsigned); ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1377,7 +1610,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1395,7 +1628,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1405,6 +1638,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, @@ -1416,13 +1650,13 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); - if (ctl_stats.arenas[i].initialized == false) { + if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { ret = NULL; goto label_return; } @@ -48,7 +48,8 @@ huge_palloc(size_t size, size_t alignment, bool zero) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed); + ret = chunk_alloc(csize, alignment, false, &is_zeroed, + chunk_dss_prec_get()); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -101,7 +102,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) void * huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) + size_t alignment, bool zero, bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -180,7 +181,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); } return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index b2daa30..8a667b6 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -33,7 +33,8 @@ unsigned ncpus; malloc_mutex_t arenas_lock; arena_t **arenas; -unsigned narenas; +unsigned narenas_total; +unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,14 +145,14 @@ choose_arena_hard(void) { arena_t *ret; - if (narenas > 1) { + if (narenas_auto > 1) { unsigned i, choose, first_null; choose = 0; - first_null = narenas; + first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { + for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* * Choose the first arena that has the lowest @@ -160,7 +161,7 @@ choose_arena_hard(void) if (arenas[i]->nthreads < arenas[choose]->nthreads) choose = i; - } else if (first_null == narenas) { + } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized * arena, in case all extant arenas are in use. @@ -174,7 +175,8 @@ choose_arena_hard(void) } } - if (arenas[choose]->nthreads == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -203,7 +205,7 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { - unsigned i; + unsigned narenas, i; /* * Merge stats from extant threads. This is racy, since @@ -212,7 +214,7 @@ stats_print_atexit(void) * out of date by the time they are reported, if other threads * continue to allocate. */ - for (i = 0; i < narenas; i++) { + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arenas[i]; if (arena != NULL) { tcache_t *tcache; @@ -554,6 +556,30 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + if (strncmp("dss", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strncmp(dss_prec_names[i], v, vlen) + == 0) { + if (chunk_dss_prec_set(i)) { + malloc_conf_error( + "Error setting dss", + k, klen, v, vlen); + } else { + opt_dss = + dss_prec_names[i]; + match = true; + break; + } + } + } + if (match == false) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", @@ -699,9 +725,9 @@ malloc_init_hard(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas = 1; + narenas_total = narenas_auto = 1; arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -759,20 +785,21 @@ malloc_init_hard(void) else opt_narenas = 1; } - narenas = opt_narenas; + narenas_auto = opt_narenas; /* * Make sure that the arenas array can be allocated. In practice, this * limit is enough to allow the allocator to function, but the ctl * machinery will fail to allocate memory at far lower limits. */ - if (narenas > chunksize / sizeof(arena_t *)) { - narenas = chunksize / sizeof(arena_t *); + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n", - narenas); + narenas_auto); } + narenas_total = narenas_auto; /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); if (arenas == NULL) { malloc_mutex_unlock(&init_lock); return (true); @@ -781,7 +808,7 @@ malloc_init_hard(void) * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; @@ -1346,18 +1373,19 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment))); if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipallocx(usize, alignment, zero, try_tcache, arena)); else if (zero) - return (icalloc(usize)); + return (icallocx(usize, try_tcache, arena)); else - return (imalloc(usize)); + return (imallocx(usize, try_tcache, arena)); } int @@ -1368,6 +1396,9 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; assert(ptr != NULL); assert(size != 0); @@ -1375,6 +1406,14 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1391,18 +1430,19 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(usize_promoted, alignment, zero, + try_tcache, arena); if (p == NULL) goto label_oom; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } prof_malloc(p, usize, cnt); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } @@ -1439,6 +1479,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); @@ -1446,6 +1489,19 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = true; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); + try_tcache_dalloc = (chunk == *ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + p = *ptr; if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; @@ -1472,9 +1528,10 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= SMALL_MAXCLASS) { - q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move); + alignment, zero, no_move, try_tcache_alloc, + try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (max_usize < PAGE) { @@ -1483,7 +1540,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else usize = isalloc(q, config_prof); } else { - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; usize = isalloc(q, config_prof); @@ -1500,7 +1558,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p, false); old_rzsize = u2rz(old_size); } - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (config_stats) @@ -1561,10 +1620,19 @@ je_dallocm(void *ptr, int flags) { size_t usize; size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + UTRACE(ptr, 0, 0); if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); @@ -1577,7 +1645,7 @@ je_dallocm(void *ptr, int flags) thread_allocated_tsd_get()->deallocated += usize; if (config_valgrind && opt_valgrind) rzsize = p2rz(ptr); - iqalloc(ptr); + iqallocx(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); @@ -1654,7 +1722,7 @@ _malloc_prefork(void) /* Acquire all mutexes in a safe order. */ ctl_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } @@ -1685,7 +1753,7 @@ _malloc_postfork(void) base_postfork_parent(); chunk_postfork_parent(); prof_postfork_parent(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } @@ -1705,7 +1773,7 @@ jemalloc_postfork_child(void) base_postfork_child(); chunk_postfork_child(); prof_postfork_child(); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } diff --git a/src/stats.c b/src/stats.c index 433b80d..43f87af 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,6 +206,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i, bool bins, bool large) { unsigned nthreads; + const char *dss; size_t page, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -218,6 +219,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", + dss); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -370,6 +374,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) @@ -400,7 +405,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", sizeof(void *)); @@ -472,7 +477,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("stats.chunks.current", &chunks_current, size_t); malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64" %12zu %12zu\n", chunks_total, chunks_high, chunks_current); /* Print huge stats. */ diff --git a/src/tcache.c b/src/tcache.c index 60244c4..47e14f3 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -288,7 +288,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icalloc(size); + tcache = (tcache_t *)icallocx(size, false, arena); if (tcache == NULL) return (NULL); @@ -364,7 +364,7 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idalloc(tcache); + idallocx(tcache, false); } void |