diff options
| author | Jason Evans <jasone@canonware.com> | 2010-10-24 23:51:13 (GMT) |
|---|---|---|
| committer | Jason Evans <jasone@canonware.com> | 2010-10-24 23:51:13 (GMT) |
| commit | a39d5b6ef2b2f4d4dc6be397ed1465fcbe5ce38f (patch) | |
| tree | 635570f7283bfc4c8d504d9881e31d0de51cca31 /jemalloc/src | |
| parent | e139ab8b4f69e05b809528a9d98e171e5e89ce0c (diff) | |
| parent | 3af83344a54f6c6051e532188586d1a07474c068 (diff) | |
| download | jemalloc-2.0.0.zip jemalloc-2.0.0.tar.gz jemalloc-2.0.0.tar.bz2 | |
Merge branch 'dev'2.0.0
Diffstat (limited to 'jemalloc/src')
| -rw-r--r-- | jemalloc/src/arena.c | 1022 | ||||
| -rw-r--r-- | jemalloc/src/base.c | 2 | ||||
| -rw-r--r-- | jemalloc/src/chunk.c | 39 | ||||
| -rw-r--r-- | jemalloc/src/chunk_mmap.c | 57 | ||||
| -rw-r--r-- | jemalloc/src/chunk_swap.c | 7 | ||||
| -rw-r--r-- | jemalloc/src/ckh.c | 28 | ||||
| -rw-r--r-- | jemalloc/src/ctl.c | 175 | ||||
| -rw-r--r-- | jemalloc/src/huge.c | 85 | ||||
| -rw-r--r-- | jemalloc/src/jemalloc.c | 1390 | ||||
| -rw-r--r-- | jemalloc/src/mutex.c | 14 | ||||
| -rw-r--r-- | jemalloc/src/prof.c | 795 | ||||
| -rw-r--r-- | jemalloc/src/rtree.c | 43 | ||||
| -rw-r--r-- | jemalloc/src/stats.c | 204 | ||||
| -rw-r--r-- | jemalloc/src/tcache.c | 56 | ||||
| -rw-r--r-- | jemalloc/src/zone.c | 354 |
15 files changed, 2657 insertions, 1614 deletions
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index ee859fc..3d4f888 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -165,7 +165,7 @@ static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero); -static void arena_purge(arena_t *arena); +static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); @@ -174,16 +174,18 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); +static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t size, size_t oldsize); + void *ptr, size_t oldsize, size_t size); static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t size, size_t oldsize); -static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize); -#ifdef JEMALLOC_TINY -static size_t pow2_ceil(size_t x); -#endif + void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); +static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); static bool small_size2bin_init(void); #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void); @@ -281,12 +283,33 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size == 0); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)run->bin->reg0_offset); + /* + * Freeing a pointer past in the run's frontier can cause assertion + * failure. + */ + assert((uintptr_t)ptr < (uintptr_t)run->next); *(void **)ptr = run->avail; run->avail = ptr; run->nfree++; } +#ifdef JEMALLOC_DEBUG +static inline void +arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); + for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + assert(p[i] == 0); +} +#endif + static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, bool zero) @@ -300,39 +323,40 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, old_ndirty = chunk->ndirty; run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - flag_dirty = chunk->map[run_ind].bits & CHUNK_MAP_DIRTY; + flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >> + total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> PAGE_SHIFT; - assert((chunk->map[run_ind+total_pages-1].bits & CHUNK_MAP_DIRTY) == - flag_dirty); + assert((chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty); need_pages = (size >> PAGE_SHIFT); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]); + arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - chunk->map[run_ind+need_pages].bits = (rem_pages << - PAGE_SHIFT) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1].bits = (rem_pages << - PAGE_SHIFT) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; } else { - chunk->map[run_ind+need_pages].bits = (rem_pages << - PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits & - CHUNK_MAP_ZEROED); - chunk->map[run_ind+total_pages-1].bits = (rem_pages << - PAGE_SHIFT) | - (chunk->map[run_ind+total_pages-1].bits & - CHUNK_MAP_ZEROED); + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-map_bias].bits & + CHUNK_MAP_UNZEROED); + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); } arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages]); + &chunk->map[run_ind+need_pages-map_bias]); } /* Update dirty page accounting. */ @@ -353,13 +377,19 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed (i.e. never before touched). */ for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind + i].bits & - CHUNK_MAP_ZEROED) == 0) { + if ((chunk->map[run_ind+i-map_bias].bits + & CHUNK_MAP_UNZEROED) != 0) { memset((void *)((uintptr_t) - chunk + ((run_ind + i) << + chunk + ((run_ind+i) << PAGE_SHIFT)), 0, PAGE_SIZE); } +#ifdef JEMALLOC_DEBUG + else { + arena_chunk_validate_zeroed( + chunk, run_ind+i); + } +#endif } } else { /* @@ -376,27 +406,54 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * Set the last element first, in case the run only contains one * page (i.e. both statements set the same element). */ - chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE | -#ifdef JEMALLOC_PROF - CHUNK_MAP_CLASS_MASK | -#endif - CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind+need_pages-1-map_bias].bits = + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } else { assert(zero == false); /* - * Propagate the dirty flag to the allocated small run, so that - * arena_dalloc_bin_run() has the ability to conditionally trim - * clean pages. + * Propagate the dirty and unzeroed flags to the allocated + * small run, so that arena_dalloc_bin_run() has the ability to + * conditionally trim clean pages. + */ + chunk->map[run_ind-map_bias].bits = + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not + * actually cause an observable failure. */ - chunk->map[run_ind].bits = CHUNK_MAP_ALLOCATED | flag_dirty; + if (flag_dirty == 0 && + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) + == 0) + arena_chunk_validate_zeroed(chunk, run_ind); +#endif for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind + i].bits = (i << PAGE_SHIFT) - | CHUNK_MAP_ALLOCATED; + chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) + | (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) + arena_chunk_validate_zeroed(chunk, run_ind+i); +#endif + } + chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages + - 1) << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) { + arena_chunk_validate_zeroed(chunk, + run_ind+need_pages-1); } - chunk->map[run_ind + need_pages - 1].bits = ((need_pages - 1) << - PAGE_SHIFT) | CHUNK_MAP_ALLOCATED | flag_dirty; +#endif } } @@ -413,20 +470,24 @@ arena_chunk_alloc(arena_t *arena) arena->spare = NULL; /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) == 0) + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_insert(runs_avail, - &chunk->map[arena_chunk_header_npages]); + assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); + assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) + == arena_maxclass); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == + (chunk->map[chunk_npages-1-map_bias].bits & + CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[0]); } else { bool zero; - size_t zeroed; + size_t unzeroed; zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -449,17 +510,28 @@ arena_chunk_alloc(arena_t *arena) * Mark the pages as zeroed iff chunk_alloc() returned a zeroed * chunk. */ - zeroed = zero ? CHUNK_MAP_ZEROED : 0; - for (i = 0; i < arena_chunk_header_npages; i++) - chunk->map[i].bits = 0; - chunk->map[i].bits = arena_maxclass | zeroed; - for (i++; i < chunk_npages-1; i++) - chunk->map[i].bits = zeroed; - chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed; + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + chunk->map[0].bits = arena_maxclass | unzeroed; + /* + * There is no need to initialize the internal page map entries + * unless the chunk is not zeroed. + */ + if (zero == false) { + for (i = map_bias+1; i < chunk_npages-1; i++) + chunk->map[i-map_bias].bits = unzeroed; + } +#ifdef JEMALLOC_DEBUG + else { + for (i = map_bias+1; i < chunk_npages-1; i++) + assert(chunk->map[i-map_bias].bits == unzeroed); + } +#endif + chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | + unzeroed; /* Insert the run into the runs_avail_clean tree. */ arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[arena_chunk_header_npages]); + &chunk->map[0]); } return (chunk); @@ -474,13 +546,11 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. */ - if ((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) == 0) + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, - &chunk->map[arena_chunk_header_npages]); + arena_avail_tree_remove(runs_avail, &chunk->map[0]); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; @@ -516,8 +586,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -527,8 +598,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -541,8 +613,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + - (arena_chunk_header_npages << PAGE_SHIFT)); + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << + PAGE_SHIFT)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -555,8 +627,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -566,8 +639,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -587,7 +661,7 @@ arena_maybe_purge(arena_t *arena) (arena->ndirty - arena->npurgatory) > chunk_npages && (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory)) - arena_purge(arena); + arena_purge(arena, false); } static inline void @@ -595,7 +669,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) { ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind, flag_zeroed; + size_t pageind, flag_unzeroed; #ifdef JEMALLOC_DEBUG size_t ndirty; #endif @@ -605,11 +679,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_new(&mapelms); - flag_zeroed = -#ifdef JEMALLOC_SWAP - swap_enabled ? 0 : + flag_unzeroed = +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* + * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous + * mappings, but not for file-backed mappings. + */ +# ifdef JEMALLOC_SWAP + swap_enabled ? CHUNK_MAP_UNZEROED : +# endif + 0; +#else + CHUNK_MAP_UNZEROED; #endif - CHUNK_MAP_ZEROED; /* * If chunk is the spare, temporarily re-allocate it, 1) so that its @@ -627,14 +709,13 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * run. */ if (chunk == arena->spare) { - assert((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) != 0); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); arena_chunk_alloc(arena); } /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind]; + for (pageind = map_bias; pageind < chunk_npages;) { + mapelm = &chunk->map[pageind-map_bias]; if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { size_t npages; @@ -646,25 +727,22 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); + mapelm->bits = (npages << PAGE_SHIFT) | + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; /* * Update internal elements in the page map, so - * that CHUNK_MAP_ZEROED is properly set. - * madvise(..., MADV_DONTNEED) results in - * zero-filled pages for anonymous mappings, - * but not for file-backed mappings. + * that CHUNK_MAP_UNZEROED is properly set. */ - mapelm->bits = (npages << PAGE_SHIFT) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | - flag_zeroed; for (i = 1; i < npages - 1; i++) { - chunk->map[pageind + i].bits = - flag_zeroed; + chunk->map[pageind+i-map_bias].bits = + flag_unzeroed; } if (npages > 1) { - chunk->map[pageind + npages - 1].bits = - (npages << PAGE_SHIFT) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | - flag_zeroed; + chunk->map[ + pageind+npages-1-map_bias].bits = + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; } arena->nactive += npages; @@ -706,8 +784,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) nmadvise = 0; #endif ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; size_t npages = mapelm->bits >> PAGE_SHIFT; assert(pageind + npages <= chunk_npages); @@ -715,8 +793,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(ndirty >= npages); ndirty -= npages; #endif + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), (npages << PAGE_SHIFT), MADV_DONTNEED); +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_FREE); +#else +# error "No method defined for purging unused dirty pages." +#endif + #ifdef JEMALLOC_STATS nmadvise++; #endif @@ -732,8 +819,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; mapelm = ql_first(&mapelms)) { - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << PAGE_SHIFT)); @@ -743,7 +830,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } static void -arena_purge(arena_t *arena) +arena_purge(arena_t *arena, bool all) { arena_chunk_t *chunk; size_t npurgatory; @@ -757,8 +844,8 @@ arena_purge(arena_t *arena) assert(ndirty == arena->ndirty); #endif assert(arena->ndirty > arena->npurgatory); - assert(arena->ndirty > chunk_npages); - assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty); + assert(arena->ndirty > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -769,8 +856,9 @@ arena_purge(arena_t *arena) * purge, and add the result to arena->npurgatory. This will keep * multiple threads from racing to reduce ndirty below the threshold. */ - npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >> - opt_lg_dirty_mult); + npurgatory = arena->ndirty - arena->npurgatory; + if (all == false) + npurgatory -= arena->nactive >> opt_lg_dirty_mult; arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -826,6 +914,15 @@ arena_purge(arena_t *arena) } } +void +arena_purge_all(arena_t *arena) +{ + + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); +} + static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) { @@ -836,11 +933,18 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - assert(run_ind >= arena_chunk_header_npages); + assert(run_ind >= map_bias); assert(run_ind < chunk_npages); - if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) - size = chunk->map[run_ind].bits & ~PAGE_MASK; - else + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { + size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + assert(size == PAGE_SIZE || + (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + } else size = run->bin->run_size; run_pages = (size >> PAGE_SHIFT); arena->nactive -= run_pages; @@ -849,7 +953,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ - if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) != 0) + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; runs_avail = dirty ? &arena->runs_avail_dirty : @@ -857,72 +961,91 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Mark pages as unallocated in the chunk map. */ if (dirty) { - chunk->map[run_ind].bits = size | flag_dirty; - chunk->map[run_ind+run_pages-1].bits = size | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + CHUNK_MAP_DIRTY; chunk->ndirty += run_pages; arena->ndirty += run_pages; } else { - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_ZEROED); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_ZEROED); + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); } /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0 && - (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_DIRTY) == - flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages].bits & + (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & ~PAGE_MASK; + size_t nrun_pages = nrun_size >> PAGE_SHIFT; /* * Remove successor from runs_avail; the coalesced run is * inserted later. */ + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & ~PAGE_MASK) == nrun_size); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_DIRTY) == flag_dirty); arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages]); + &chunk->map[run_ind+run_pages-map_bias]); size += nrun_size; - run_pages = size >> PAGE_SHIFT; + run_pages += nrun_pages; - assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK) - == nrun_size); - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); } /* Try to coalesce backward. */ - if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1].bits & + if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK; + size_t prun_size = chunk->map[run_ind-1-map_bias].bits & + ~PAGE_MASK; + size_t prun_pages = prun_size >> PAGE_SHIFT; - run_ind -= prun_size >> PAGE_SHIFT; + run_ind -= prun_pages; /* * Remove predecessor from runs_avail; the coalesced run is * inserted later. */ - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]); + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) + == prun_size); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) + == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind-map_bias]); size += prun_size; - run_pages = size >> PAGE_SHIFT; + run_pages += prun_pages; - assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size); - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); } /* Insert into runs_avail, now that coalescing is complete. */ - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind]); + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); if (dirty) { /* @@ -941,8 +1064,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * manipulation checks whether the first run is unallocated and extends * to the end of the chunk. */ - if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK | - CHUNK_MAP_ALLOCATED)) == arena_maxclass) + if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == + arena_maxclass) arena_chunk_dealloc(arena, chunk); /* @@ -962,18 +1085,40 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; - size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; assert(oldsize > newsize); /* * Update the chunk map so that arena_run_dalloc() can treat the - * leading run as separately allocated. + * leading run as separately allocated. Set the last element of each + * run first, in case of single-page runs. */ - assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE); - assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED); - chunk->map[pageind].bits = (oldsize - newsize) | flags; - chunk->map[pageind+head_npages].bits = newsize | flags; + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = (oldsize - newsize) + | flag_dirty | (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef JEMALLOC_DEBUG + { + size_t tail_npages = newsize >> PAGE_SHIFT; + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_DIRTY) == flag_dirty); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_ALLOCATED) != 0); + } +#endif + chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; arena_run_dalloc(arena, run, false); } @@ -983,20 +1128,40 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t npages = newsize >> PAGE_SHIFT; - size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK; + size_t head_npages = newsize >> PAGE_SHIFT; + size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY; assert(oldsize > newsize); /* * Update the chunk map so that arena_run_dalloc() can treat the - * trailing run as separately allocated. + * trailing run as separately allocated. Set the last element of each + * run first, in case of single-page runs. */ - assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE); - assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED); - chunk->map[pageind].bits = newsize | flags; - chunk->map[pageind+npages-1].bits = newsize | flags; - chunk->map[pageind+npages].bits = (oldsize - newsize) | flags; + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = + flag_dirty | + (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | + flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1018,8 +1183,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) arena_run_tree_remove(&bin->runs, mapelm); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -1039,7 +1204,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) /* Initialize run internals. */ run->bin = bin; run->avail = NULL; - run->next = (void *)(((uintptr_t)run) + + run->next = (void *)((uintptr_t)run + (uintptr_t)bin->reg0_offset); run->nfree = bin->nregs; #ifdef JEMALLOC_DEBUG @@ -1061,7 +1226,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) /* * arena_run_alloc() failed, but another thread may have made - * sufficient memory available while this one dopped bin->lock above, + * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1073,8 +1238,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) arena_run_tree_remove(&bin->runs, mapelm); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -1105,11 +1270,21 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin); if (run != NULL) { - malloc_mutex_unlock(&bin->lock); - malloc_mutex_lock(&arena->lock); - arena_run_dalloc(arena, run, false); - malloc_mutex_unlock(&arena->lock); - malloc_mutex_lock(&bin->lock); + arena_chunk_t *chunk; + + /* + * arena_run_alloc() may have allocated run, or it may + * have pulled it from the bin's run tree. Therefore + * it is unsafe to make any assumptions about how run + * has previously been used, and arena_bin_lower_run() + * must be called, as if a region were just deallocated + * from the run. + */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + if (run->nfree == bin->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); } return (ret); } @@ -1424,17 +1599,19 @@ arena_malloc(size_t size, bool zero) /* Only handles large allocations that require more than page alignment. */ void * -arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) +arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, + bool zero) { void *ret; size_t offset; arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); - assert((alignment & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, false); + ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1482,10 +1659,12 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) malloc_mutex_unlock(&arena->lock); #ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } #endif return (ret); } @@ -1502,8 +1681,8 @@ arena_salloc(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + @@ -1535,11 +1714,11 @@ arena_prof_promoted(const void *ptr, size_t size) assert(isalloc(ptr) == PAGE_SIZE); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = small_size2bin[size]; assert(binind < nbins); - chunk->map[pageind].bits = (chunk->map[pageind].bits & - ~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT); + chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & + ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } size_t @@ -1553,8 +1732,8 @@ arena_salloc_demote(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + @@ -1569,9 +1748,9 @@ arena_salloc_demote(const void *ptr) assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; if (prof_promote && ret == PAGE_SIZE && (mapbits & - CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) { + CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT); + CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); ret = chunk->arena->bins[binind].reg_size; } @@ -1580,144 +1759,12 @@ arena_salloc_demote(const void *ptr) return (ret); } - -static inline unsigned -arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, - size_t size) -{ - unsigned shift, diff, regind; - - assert(run->magic == ARENA_RUN_MAGIC); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - shift = ffs(size) - 1; - diff >>= shift; - size >>= shift; - - if (size == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * size); - assert(regind < bin->nregs); - - return (regind); -} - -prof_ctx_t * -arena_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else - ret = chunk->map[pageind].prof_ctx; - - return (ret); -} - -void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote == false) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - - *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - chunk->map[pageind].prof_ctx = ctx; -} #endif static void -arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, +arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t npages, run_ind, past; /* Dissociate run from bin. */ if (run == bin->runcur) @@ -1725,7 +1772,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, else if (bin->nregs != 1) { size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = &chunk->map[run_pageind]; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; /* * This block's conditional is necessary because if the run * only contains one region, then it never gets inserted into @@ -1733,13 +1781,24 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, */ arena_run_tree_remove(&bin->runs, run_mapelm); } +} + +static void +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + size_t npages, run_ind, past; + + assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, &chunk->map[ + (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); malloc_mutex_unlock(&bin->lock); /******************************/ npages = bin->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U - - (uintptr_t)chunk) >> PAGE_SHIFT) + 1; + past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) + >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1747,19 +1806,21 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ - if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0 && past - run_ind - < npages) { + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past + - run_ind < npages) { /* * Trim clean pages. Convert to large run beforehand. Set the * last map element first, in case this is a one-page run. */ - chunk->map[run_ind+npages-1].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind].bits = bin->run_size | CHUNK_MAP_LARGE | - (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | + (chunk->map[run_ind+npages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind-map_bias].bits = bin->run_size | + CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), - ((npages - (past - run_ind)) << PAGE_SHIFT), false); - npages = past - run_ind; + ((past - run_ind) << PAGE_SHIFT), false); + /* npages = past - run_ind; */ } #ifdef JEMALLOC_DEBUG run->magic = 0; @@ -1773,6 +1834,42 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, #endif } +static void +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* + * Make sure that bin->runcur always refers to the lowest non-full run, + * if one exists. + */ + if (bin->runcur == NULL) + bin->runcur = run; + else if ((uintptr_t)run < (uintptr_t)bin->runcur) { + /* Switch runcur. */ + if (bin->runcur->nfree > 0) { + arena_chunk_t *runcur_chunk = + CHUNK_ADDR2BASE(bin->runcur); + size_t runcur_pageind = (((uintptr_t)bin->runcur - + (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *runcur_mapelm = + &runcur_chunk->map[runcur_pageind-map_bias]; + + /* Insert runcur. */ + arena_run_tree_insert(&bin->runs, runcur_mapelm); + } + bin->runcur = run; + } else { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); + arena_run_tree_insert(&bin->runs, run_mapelm); + } +} + void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm) @@ -1784,7 +1881,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t size; #endif - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); @@ -1799,43 +1896,11 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - - if (run->nfree == bin->nregs) + if (run->nfree == bin->nregs) { + arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); - else if (run->nfree == 1 && run != bin->runcur) { - /* - * Make sure that bin->runcur always refers to the lowest - * non-full run, if one exists. - */ - if (bin->runcur == NULL) - bin->runcur = run; - else if ((uintptr_t)run < (uintptr_t)bin->runcur) { - /* Switch runcur. */ - if (bin->runcur->nfree > 0) { - arena_chunk_t *runcur_chunk = - CHUNK_ADDR2BASE(bin->runcur); - size_t runcur_pageind = - (((uintptr_t)bin->runcur - - (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *runcur_mapelm = - &runcur_chunk->map[runcur_pageind]; - - /* Insert runcur. */ - arena_run_tree_insert(&bin->runs, - runcur_mapelm); - } - bin->runcur = run; - } else { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind]; - - assert(arena_run_tree_search(&bin->runs, run_mapelm) == - NULL); - arena_run_tree_insert(&bin->runs, run_mapelm); - } - } + } else if (run->nfree == 1 && run != bin->runcur) + arena_bin_lower_run(arena, chunk, run, bin); #ifdef JEMALLOC_STATS bin->stats.allocated -= size; @@ -1908,7 +1973,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t size = chunk->map[pageind].bits & ~PAGE_MASK; + size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; #endif #ifdef JEMALLOC_FILL @@ -1930,7 +1995,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t size, size_t oldsize) + size_t oldsize, size_t size) { assert(size < oldsize); @@ -1965,50 +2030,71 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t size, size_t oldsize) + size_t oldsize, size_t size, size_t extra, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; size_t npages = oldsize >> PAGE_SHIFT; + size_t followsize; - assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK)); + assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); /* Try to extend the run. */ - assert(size > oldsize); + assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); - if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits - & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits & - ~PAGE_MASK) >= size - oldsize) { + if (pageind + npages < chunk_npages && + (chunk->map[pageind+npages-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0 && (followsize = + chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - + oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ + size_t flag_dirty; + size_t splitsize = (oldsize + followsize <= size + extra) + ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true, - false); + ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); - chunk->map[pageind].bits = size | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + size = oldsize + splitsize; + npages = size >> PAGE_SHIFT; -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + /* + * Mark the extended run as dirty if either portion of the run + * was dirty before allocation. This is rather pedantic, + * because there's not actually any sequence of events that + * could cause the resulting run to be passed to + * arena_run_dalloc() with the dirty argument set to false + * (which is when dirty flag consistency would really matter). + */ + flag_dirty = (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY) | + (chunk->map[pageind+npages-1-map_bias].bits & + CHUNK_MAP_DIRTY); + chunk->map[pageind-map_bias].bits = size | flag_dirty + | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; - } +#ifdef JEMALLOC_STATS + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - + 1].curruns; + } #endif malloc_mutex_unlock(&arena->lock); return (false); @@ -2023,11 +2109,12 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(void *ptr, size_t size, size_t oldsize) +arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { size_t psize; - psize = PAGE_CEILING(size); + psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ #ifdef JEMALLOC_FILL @@ -2053,14 +2140,15 @@ arena_ralloc_large(void *ptr, size_t size, size_t oldsize) oldsize - size); } #endif - arena_ralloc_large_shrink(arena, chunk, ptr, psize, - oldsize); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, + psize); return (false); } else { bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - psize, oldsize); + oldsize, PAGE_CEILING(size), + psize - PAGE_CEILING(size), zero); #ifdef JEMALLOC_FILL - if (ret == false && opt_zero) { + if (ret == false && zero == false && opt_zero) { memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); } @@ -2071,49 +2159,89 @@ arena_ralloc_large(void *ptr, size_t size, size_t oldsize) } void * -arena_ralloc(void *ptr, size_t size, size_t oldsize) +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { - void *ret; - size_t copysize; - /* Try to avoid moving the allocation. */ + /* + * Avoid moving the allocation if the size class can be left the same. + */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - if (size <= small_maxclass && small_size2bin[size] == - small_size2bin[oldsize]) - goto IN_PLACE; + assert(choose_arena()->bins[small_size2bin[ + oldsize]].reg_size == oldsize); + if ((size + extra <= small_maxclass && + small_size2bin[size + extra] == + small_size2bin[oldsize]) || (size <= oldsize && + size + extra >= oldsize)) { +#ifdef JEMALLOC_FILL + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), + 0x5a, oldsize - size); + } +#endif + return (ptr); + } } else { assert(size <= arena_maxclass); - if (size > small_maxclass) { - if (arena_ralloc_large(ptr, size, oldsize) == - false) + if (size + extra > small_maxclass) { + if (arena_ralloc_large(ptr, oldsize, size, + extra, zero) == false) return (ptr); } } } + /* Reallocation would require a move. */ + return (NULL); +} + +void * +arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); + if (ret != NULL) + return (ret); + + /* - * If we get here, then size and oldsize are different enough that we - * need to move the object. In that case, fall back to allocating new - * space and copying. + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and + * copying. */ - ret = arena_malloc(size, false); - if (ret == NULL) - return (NULL); + if (alignment != 0) + ret = ipalloc(size + extra, alignment, zero); + else + ret = arena_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) + ret = ipalloc(size, alignment, zero); + else + ret = arena_malloc(size, zero); - /* Junk/zero-filling were already done by arena_malloc(). */ + if (ret == NULL) + return (NULL); + } + + /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); idalloc(ptr); return (ret); -IN_PLACE: -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); - else if (opt_zero && size > oldsize) - memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); -#endif - return (ptr); } bool @@ -2239,26 +2367,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -#ifdef JEMALLOC_TINY -/* Compute the smallest power of 2 that is >= x. */ -static size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (SIZEOF_PTR == 8) - x |= x >> 32; -#endif - x++; - return (x); -} -#endif - #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void) @@ -2381,6 +2489,7 @@ bool arena_boot(void) { size_t header_size; + unsigned i; /* Set variables according to the value of opt_lg_[qc]space_max. */ qspace_max = (1U << opt_lg_qspace_max); @@ -2420,7 +2529,7 @@ arena_boot(void) if (nbins > 255) { char line_buf[UMAX2S_BUFSIZE]; malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 255)\n"); abort(); } @@ -2429,7 +2538,7 @@ arena_boot(void) if (nbins > 256) { char line_buf[UMAX2S_BUFSIZE]; malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 256)\n"); abort(); } @@ -2439,13 +2548,26 @@ arena_boot(void) /* * Compute the header size such that it is large enough to contain the - * page map. + * page map. The page map is biased to omit entries for the header + * itself, so some iteration is necessary to compute the map bias. + * + * 1) Compute safe header_size and map_bias values that include enough + * space for an unbiased page map. + * 2) Refine map_bias based on (1) to omit the header pages in the page + * map. The resulting map_bias may be one too small. + * 3) Refine map_bias based on (2). The result will be >= the result + * from (2), and will always be correct. */ - header_size = sizeof(arena_chunk_t) + - (sizeof(arena_chunk_map_t) * (chunk_npages - 1)); - arena_chunk_header_npages = (header_size >> PAGE_SHIFT) + - ((header_size & PAGE_MASK) != 0); - arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT); + map_bias = 0; + for (i = 0; i < 3; i++) { + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> PAGE_SHIFT) + ((header_size & + PAGE_MASK) != 0); + } + assert(map_bias > 0); + + arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); return (false); } diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c index 605197e..cc85e84 100644 --- a/jemalloc/src/base.c +++ b/jemalloc/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, &zero); + base_pages = chunk_alloc(csize, true, &zero); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c index e6e3bcd..00bf50a 100644 --- a/jemalloc/src/chunk.c +++ b/jemalloc/src/chunk.c @@ -14,11 +14,15 @@ malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +rtree_t *chunks_rtree; +#endif + /* Various chunk-related settings. */ size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t arena_chunk_header_npages; +size_t map_bias; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ @@ -30,7 +34,7 @@ size_t arena_maxclass; /* Max size class for arenas. */ * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, bool *zero) +chunk_alloc(size_t size, bool base, bool *zero) { void *ret; @@ -63,10 +67,18 @@ chunk_alloc(size_t size, bool *zero) /* All strategies for allocation failed. */ ret = NULL; RETURN: +#ifdef JEMALLOC_IVSALLOC + if (base == false && ret != NULL) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size); + return (NULL); + } + } +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (ret != NULL) { # ifdef JEMALLOC_PROF - bool udump; + bool gdump; # endif malloc_mutex_lock(&chunks_mtx); # ifdef JEMALLOC_STATS @@ -76,17 +88,17 @@ RETURN: if (stats_chunks.curchunks > stats_chunks.highchunks) { stats_chunks.highchunks = stats_chunks.curchunks; # ifdef JEMALLOC_PROF - udump = true; + gdump = true; # endif } # ifdef JEMALLOC_PROF else - udump = false; + gdump = false; # endif malloc_mutex_unlock(&chunks_mtx); # ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_udump && udump) - prof_udump(); + if (opt_prof && opt_prof_gdump && gdump) + prof_gdump(); # endif } #endif @@ -104,6 +116,9 @@ chunk_dealloc(void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); +#ifdef JEMALLOC_IVSALLOC + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) malloc_mutex_lock(&chunks_mtx); stats_chunks.curchunks -= (size / chunksize); @@ -126,21 +141,27 @@ chunk_boot(void) { /* Set variables according to the value of opt_lg_chunk. */ - chunksize = (1LU << opt_lg_chunk); + chunksize = (ZU(1) << opt_lg_chunk); assert(chunksize >= PAGE_SIZE); chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> PAGE_SHIFT); +#ifdef JEMALLOC_IVSALLOC + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (malloc_mutex_init(&chunks_mtx)) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); #endif - #ifdef JEMALLOC_SWAP if (chunk_swap_boot()) return (true); #endif + if (chunk_mmap_boot()) + return (true); #ifdef JEMALLOC_DSS if (chunk_dss_boot()) return (true); diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c index d9f9e86..bc36755 100644 --- a/jemalloc/src/chunk_mmap.c +++ b/jemalloc/src/chunk_mmap.c @@ -6,26 +6,30 @@ /* * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. We can get away without TLS here, - * since the state of mmap_unaligned only affects performance, rather than - * correct function. + * potentially avoid some system calls. */ -static #ifndef NO_TLS - __thread +static __thread bool mmap_unaligned_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#define MMAP_UNALIGNED_GET() mmap_unaligned_tls +#define MMAP_UNALIGNED_SET(v) do { \ + mmap_unaligned_tls = (v); \ +} while (0) +#else +static pthread_key_t mmap_unaligned_tsd; +#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) +#define MMAP_UNALIGNED_SET(v) do { \ + pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ +} while (0) #endif - bool mmap_unaligned -#ifndef NO_TLS - JEMALLOC_ATTR(tls_model("initial-exec")) -#endif - ; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static void *pages_map(void *addr, size_t size, bool noreserve); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, + bool noreserve); static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); /******************************************************************************/ @@ -54,9 +58,9 @@ pages_map(void *addr, size_t size, bool noreserve) * We succeeded in mapping memory, but not in the right place. */ if (munmap(ret, size) == -1) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); @@ -76,9 +80,9 @@ pages_unmap(void *addr, size_t size) { if (munmap(addr, size) == -1) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); @@ -128,7 +132,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) * method. */ if (unaligned == false) - mmap_unaligned = false; + MMAP_UNALIGNED_SET(false); return (ret); } @@ -166,7 +170,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * fast method next time. */ - if (mmap_unaligned == false) { + if (MMAP_UNALIGNED_GET() == false) { size_t offset; ret = pages_map(NULL, size, noreserve); @@ -175,7 +179,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) offset = CHUNK_ADDR2OFFSET(ret); if (offset != 0) { - mmap_unaligned = true; + MMAP_UNALIGNED_SET(true); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset, noreserve) == NULL) { @@ -184,7 +188,8 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true, noreserve); + ret = chunk_alloc_mmap_slow(size, true, + noreserve); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -216,3 +221,17 @@ chunk_dealloc_mmap(void *chunk, size_t size) pages_unmap(chunk, size); } + +bool +chunk_mmap_boot(void) +{ + +#ifdef NO_TLS + if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { + malloc_write("<jemalloc>: Error in pthread_key_create()\n"); + return (true); + } +#endif + + return (false); +} diff --git a/jemalloc/src/chunk_swap.c b/jemalloc/src/chunk_swap.c index ed9e414..ee038ba 100644 --- a/jemalloc/src/chunk_swap.c +++ b/jemalloc/src/chunk_swap.c @@ -294,9 +294,10 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); if (addr == MAP_FAILED) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); + + buferror(errno, buf, sizeof(buf)); malloc_write( "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): "); malloc_write(buf); @@ -304,7 +305,7 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) if (opt_abort) abort(); if (munmap(vaddr, voff) == -1) { - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c index a0c4162..682a8db 100644 --- a/jemalloc/src/ckh.c +++ b/jemalloc/src/ckh.c @@ -263,13 +263,12 @@ ckh_grow(ckh_t *ckh) lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; while (true) { lg_curcells++; - tab = (ckhc_t *) ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_curcells); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { ret = true; goto RETURN; } - memset(tab, 0, sizeof(ckhc_t) << lg_curcells); /* Swap in new table. */ ttab = ckh->tab; ckh->tab = tab; @@ -305,8 +304,8 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_curcells); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -314,7 +313,6 @@ ckh_shrink(ckh_t *ckh) */ return; } - memset(tab, 0, sizeof(ckhc_t) << lg_curcells); /* Swap in new table. */ ttab = ckh->tab; ckh->tab = tab; @@ -377,13 +375,12 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - ckh->tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_mincells); + ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, + (ZU(1) << LG_CACHELINE), true); if (ckh->tab == NULL) { ret = true; goto RETURN; } - memset(ckh->tab, 0, sizeof(ckhc_t) << lg_mincells); #ifdef JEMALLOC_DEBUG ckh->magic = CKH_MAGIG; @@ -570,12 +567,21 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, { size_t ret1, ret2; uint64_t h; + union { + const void *v; + uint64_t i; + } u; assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(&key, sizeof(void *), 0xd983396e68886082LLU); + assert(sizeof(u.v) == sizeof(u.i)); +#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) + u.i = 0; +#endif + u.v = key; + h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -586,7 +592,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, } else { assert(SIZEOF_PTR == 8); ret1 = h; - ret2 = hash(&key, sizeof(void *), 0x5e2be9aff8709a5dLLU); + ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); } *hash1 = ret1; diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index ffb732d..c83ee4f 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -41,6 +41,11 @@ CTL_PROTO(epoch) #ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) #endif +CTL_PROTO(thread_arena) +#ifdef JEMALLOC_STATS +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_deallocated) +#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -57,8 +62,15 @@ CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_lg_qspace_max) +CTL_PROTO(opt_lg_cspace_max) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) #ifdef JEMALLOC_FILL CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) #endif #ifdef JEMALLOC_SYSV CTL_PROTO(opt_sysv) @@ -66,27 +78,22 @@ CTL_PROTO(opt_sysv) #ifdef JEMALLOC_XMALLOC CTL_PROTO(opt_xmalloc) #endif -#ifdef JEMALLOC_ZERO -CTL_PROTO(opt_zero) -#endif #ifdef JEMALLOC_TCACHE CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) #endif #ifdef JEMALLOC_PROF CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) CTL_PROTO(opt_lg_prof_bt_max) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) -CTL_PROTO(opt_prof_udump) +CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) +CTL_PROTO(opt_prof_accum) +CTL_PROTO(opt_lg_prof_tcmax) #endif -CTL_PROTO(opt_stats_print) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) -CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_lg_chunk) #ifdef JEMALLOC_SWAP CTL_PROTO(opt_overcommit) #endif @@ -125,6 +132,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) #endif CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_purge) #ifdef JEMALLOC_PROF CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -210,6 +218,15 @@ static const ctl_node_t tcache_node[] = { }; #endif +static const ctl_node_t thread_node[] = { + {NAME("arena"), CTL(thread_arena)} +#ifdef JEMALLOC_STATS + , + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("deallocated"), CTL(thread_deallocated)} +#endif +}; + static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, @@ -230,36 +247,43 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, + {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)} #ifdef JEMALLOC_FILL + , {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)} #endif #ifdef JEMALLOC_SYSV - {NAME("sysv"), CTL(opt_sysv)}, + , + {NAME("sysv"), CTL(opt_sysv)} #endif #ifdef JEMALLOC_XMALLOC - {NAME("xmalloc"), CTL(opt_xmalloc)}, -#endif -#ifdef JEMALLOC_ZERO - {NAME("zero"), CTL(opt_zero)}, + , + {NAME("xmalloc"), CTL(opt_xmalloc)} #endif #ifdef JEMALLOC_TCACHE + , {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} #endif #ifdef JEMALLOC_PROF + , {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_udump"), CTL(opt_prof_udump)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)}, + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} #endif - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)} #ifdef JEMALLOC_SWAP , {NAME("overcommit"), CTL(opt_overcommit)} @@ -321,7 +345,8 @@ static const ctl_node_t arenas_node[] = { #endif {NAME("bin"), CHILD(arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)} + {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("purge"), CTL(arenas_purge)} }; #ifdef JEMALLOC_PROF @@ -448,6 +473,7 @@ static const ctl_node_t root_node[] = { #ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, #endif + {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, @@ -1028,18 +1054,61 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, VOID(); - tcache = tcache_tls; + tcache = TCACHE_GET(); if (tcache == NULL) { ret = 0; goto RETURN; } tcache_destroy(tcache); - tcache_tls = NULL; + TCACHE_SET(NULL); + + ret = 0; +RETURN: + return (ret); +} +#endif + +static int +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned newind, oldind; + + newind = oldind = choose_arena()->ind; + WRITE(oldind, unsigned); + READ(newind, unsigned); + if (newind != oldind) { + arena_t *arena; + + if (newind >= narenas) { + /* New arena index is out of range. */ + ret = EFAULT; + goto RETURN; + } + + /* Initialize arena if necessary. */ + malloc_mutex_lock(&arenas_lock); + if ((arena = arenas[newind]) == NULL) + arena = arenas_extend(newind); + malloc_mutex_unlock(&arenas_lock); + if (arena == NULL) { + ret = EAGAIN; + goto RETURN; + } + + /* Set new arena association. */ + ARENA_SET(arena); + } ret = 0; RETURN: return (ret); } + +#ifdef JEMALLOC_STATS +CTL_RO_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); +CTL_RO_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); #endif /******************************************************************************/ @@ -1137,8 +1206,15 @@ CTL_RO_FALSE_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_GEN(opt_abort, opt_abort, bool) +CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) +CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) +CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) #ifdef JEMALLOC_FILL CTL_RO_GEN(opt_junk, opt_junk, bool) +CTL_RO_GEN(opt_zero, opt_zero, bool) #endif #ifdef JEMALLOC_SYSV CTL_RO_GEN(opt_sysv, opt_sysv, bool) @@ -1146,27 +1222,22 @@ CTL_RO_GEN(opt_sysv, opt_sysv, bool) #ifdef JEMALLOC_XMALLOC CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool) #endif -#ifdef JEMALLOC_ZERO -CTL_RO_GEN(opt_zero, opt_zero, bool) -#endif #ifdef JEMALLOC_TCACHE CTL_RO_GEN(opt_tcache, opt_tcache, bool) CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) #endif #ifdef JEMALLOC_PROF CTL_RO_GEN(opt_prof, opt_prof, bool) +CTL_RO_GEN(opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool) +CTL_RO_GEN(opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool) +CTL_RO_GEN(opt_prof_accum, opt_prof_accum, bool) +CTL_RO_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) #endif -CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) -CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t) #ifdef JEMALLOC_SWAP CTL_RO_GEN(opt_overcommit, opt_overcommit, bool) #endif @@ -1249,6 +1320,44 @@ CTL_RO_GEN(arenas_nhbins, nhbins, unsigned) #endif CTL_RO_GEN(arenas_nlruns, nlclasses, size_t) +static int +arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena; + + WRITEONLY(); + arena = UINT_MAX; + WRITE(arena, unsigned); + if (newp != NULL && arena >= narenas) { + ret = EFAULT; + goto RETURN; + } else { + arena_t *tarenas[narenas]; + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena == UINT_MAX) { + unsigned i; + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena < narenas); + if (tarenas[arena] != NULL) + arena_purge_all(tarenas[arena]); + } + } + + ret = 0; +RETURN: + return (ret); +} + /******************************************************************************/ #ifdef JEMALLOC_PROF diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c index 49962ea..a035197 100644 --- a/jemalloc/src/huge.c +++ b/jemalloc/src/huge.c @@ -37,7 +37,7 @@ huge_malloc(size_t size, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, &zero); + ret = chunk_alloc(csize, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -69,12 +69,11 @@ huge_malloc(size_t size, bool zero) /* Only handles large allocations that require more than chunk alignment. */ void * -huge_palloc(size_t alignment, size_t size) +huge_palloc(size_t size, size_t alignment, bool zero) { void *ret; size_t alloc_size, chunk_size, offset; extent_node_t *node; - bool zero; /* * This allocation requires alignment that is even larger than chunk @@ -98,8 +97,7 @@ huge_palloc(size_t alignment, size_t size) if (node == NULL) return (NULL); - zero = false; - ret = chunk_alloc(alloc_size, &zero); + ret = chunk_alloc(alloc_size, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -142,45 +140,80 @@ huge_palloc(size_t alignment, size_t size) malloc_mutex_unlock(&huge_mtx); #ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, chunk_size); - else if (opt_zero) - memset(ret, 0, chunk_size); + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, chunk_size); + else if (opt_zero) + memset(ret, 0, chunk_size); + } #endif return (ret); } void * -huge_ralloc(void *ptr, size_t size, size_t oldsize) +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { - void *ret; - size_t copysize; - /* Avoid moving the allocation if the size class would not change. */ - if (oldsize > arena_maxclass && - CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) { + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize > arena_maxclass + && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + assert(CHUNK_CEILING(oldsize) == oldsize); #ifdef JEMALLOC_FILL if (opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } else if (opt_zero && size > oldsize) { - memset((void *)((uintptr_t)ptr + oldsize), 0, size - - oldsize); + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); } #endif return (ptr); } + /* Reallocation would require a move. */ + return (NULL); +} + +void * +huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = huge_ralloc_no_move(ptr, oldsize, size, extra); + if (ret != NULL) + return (ret); + /* - * If we get here, then size and oldsize are different enough that we - * need to use a different size class. In that case, fall back to - * allocating new space and copying. + * size and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. */ - ret = huge_malloc(size, false); - if (ret == NULL) - return (NULL); + if (alignment != 0) + ret = huge_palloc(size + extra, alignment, zero); + else + ret = huge_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) + ret = huge_palloc(size, alignment, zero); + else + ret = huge_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); idalloc(ptr); diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index b36590d..2aebc51 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1,85 +1,3 @@ -/*- - * This allocator implementation is designed to provide scalable performance - * for multi-threaded programs on multi-processor systems. The following - * features are included for this purpose: - * - * + Multiple arenas are used if there are multiple CPUs, which reduces lock - * contention and cache sloshing. - * - * + Thread-specific caching is used if there are multiple threads, which - * reduces the amount of locking. - * - * + Cache line sharing between arenas is avoided for internal data - * structures. - * - * + Memory is managed in chunks and runs (chunks can be split into runs), - * rather than as individual pages. This provides a constant-time - * mechanism for associating allocations with particular arenas. - * - * Allocation requests are rounded up to the nearest size class, and no record - * of the original request size is maintained. Allocations are broken into - * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and - * a 16 byte quantum on a 32-bit system, the size classes in each category are - * as follows: - * - * |========================================| - * | Category | Subcategory | Size | - * |========================================| - * | Small | Tiny | 2 | - * | | | 4 | - * | | | 8 | - * | |------------------+----------| - * | | Quantum-spaced | 16 | - * | | | 32 | - * | | | 48 | - * | | | ... | - * | | | 96 | - * | | | 112 | - * | | | 128 | - * | |------------------+----------| - * | | Cacheline-spaced | 192 | - * | | | 256 | - * | | | 320 | - * | | | 384 | - * | | | 448 | - * | | | 512 | - * | |------------------+----------| - * | | Sub-page | 760 | - * | | | 1024 | - * | | | 1280 | - * | | | ... | - * | | | 3328 | - * | | | 3584 | - * | | | 3840 | - * |========================================| - * | Large | 4 KiB | - * | | 8 KiB | - * | | 12 KiB | - * | | ... | - * | | 1012 KiB | - * | | 1016 KiB | - * | | 1020 KiB | - * |========================================| - * | Huge | 1 MiB | - * | | 2 MiB | - * | | 3 MiB | - * | | ... | - * |========================================| - * - * Different mechanisms are used accoding to category: - * - * Small: Each size class is segregated into its own set of runs. Each run - * maintains a bitmap of which regions are free/allocated. - * - * Large : Each allocation is backed by a dedicated run. Metadata are stored - * in the associated arena chunk header maps. - * - * Huge : Each allocation is backed by a dedicated contiguous set of chunks. - * Metadata are stored in a separate red-black tree. - * - ******************************************************************************* - */ - #define JEMALLOC_C_ #include "jemalloc/internal/jemalloc_internal.h" @@ -89,22 +7,30 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; -#ifndef NO_TLS static unsigned next_arena; -#endif #ifndef NO_TLS -__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#else +pthread_key_t arenas_tsd; +#endif + +#ifdef JEMALLOC_STATS +# ifndef NO_TLS +__thread thread_allocated_t thread_allocated_tls; +# else +pthread_key_t thread_allocated_tsd; +# endif #endif /* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +static bool malloc_initialized = false; /* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; +static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -115,8 +41,7 @@ size_t lg_pagesize; unsigned ncpus; /* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_options) - JEMALLOC_ATTR(visibility("default")); +const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -137,7 +62,7 @@ bool opt_xmalloc = false; #ifdef JEMALLOC_FILL bool opt_zero = false; #endif -static int opt_narenas_lshift = 0; +size_t opt_narenas = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -145,9 +70,15 @@ static int opt_narenas_lshift = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void thread_allocated_cleanup(void *arg); +#endif +static bool malloc_conf_next(char const **opts_p, char const **k_p, + size_t *klen_p, char const **v_p, size_t *vlen_p); +static void malloc_conf_error(const char *msg, const char *k, size_t klen, + const char *v, size_t vlen); +static void malloc_conf_init(void); static bool malloc_init_hard(void); -static void jemalloc_prefork(void); -static void jemalloc_postfork(void); /******************************************************************************/ /* malloc_message() setup. */ @@ -160,8 +91,14 @@ static void wrtmessage(void *cbopaque, const char *s) { - - write(STDERR_FILENO, s, strlen(s)); +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + write(STDERR_FILENO, s, strlen(s)); +#ifdef JEMALLOC_CC_SILENCE + if (result < 0) + result = errno; +#endif } void (*JEMALLOC_P(malloc_message))(void *, const char *s) @@ -179,8 +116,8 @@ arenas_extend(unsigned ind) arena_t *ret; /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(sizeof(arena_t) - + (sizeof(arena_bin_t) * (nbins - 1))); + ret = (arena_t *)base_alloc(offsetof(arena_t, bins) + + (sizeof(arena_bin_t) * nbins)); if (ret != NULL && arena_new(ret, ind) == false) { arenas[ind] = ret; return (ret); @@ -200,7 +137,6 @@ arenas_extend(unsigned ind) return (arenas[0]); } -#ifndef NO_TLS /* * Choose an arena based on a per-thread value (slow-path code only, called * only by choose_arena()). @@ -219,11 +155,29 @@ choose_arena_hard(void) } else ret = arenas[0]; - arenas_map = ret; + ARENA_SET(ret); return (ret); } + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); #endif +} static void stats_print_atexit(void) @@ -283,6 +237,17 @@ malloc_ncpus(void) return (ret); } +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void +thread_allocated_cleanup(void *arg) +{ + uint64_t *allocated = (uint64_t *)arg; + + if (allocated != NULL) + idalloc(allocated); +} +#endif + /* * FreeBSD's pthreads implementation calls malloc(3), so the malloc * implementation has to take pains to avoid infinite recursion during @@ -299,100 +264,173 @@ malloc_init(void) } static bool -malloc_init_hard(void) +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) { - unsigned i; - int linklen; - char buf[PATH_MAX + 1]; - const char *opts; - arena_t *init_arenas[1]; - - malloc_mutex_lock(&init_lock); - if (malloc_initialized || malloc_initializer == pthread_self()) { - /* - * Another thread initialized the allocator before this one - * acquired init_lock, or this thread is the initializing - * thread, and it is recursively allocating. - */ - malloc_mutex_unlock(&init_lock); - return (false); + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write("<jemalloc>: Conf string " + "ends with key\n"); + } + return (true); + default: + malloc_write("<jemalloc>: Malformed conf " + "string\n"); + return (true); + } } - if (malloc_initializer != (unsigned long)0) { - /* Busy-wait until the initializing thread completes. */ - do { - malloc_mutex_unlock(&init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); - malloc_mutex_unlock(&init_lock); - return (false); + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the + * next time this function is called, it will + * assume that end of input has been cleanly + * reached if no input remains, but we have + * optimistically already consumed the comma if + * one exists. + */ + if (*opts == '\0') { + malloc_write("<jemalloc>: Conf string " + "ends with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } } -#ifdef DYNAMIC_PAGE_SHIFT - /* Get page size. */ - { - long result; + *opts_p = opts; + return (false); +} - result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (unsigned)result; +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + char buf[PATH_MAX + 1]; - /* - * We assume that pagesize is a power of 2 when calculating - * pagesize_mask and lg_pagesize. - */ - assert(((result - 1) & result) == 0); - pagesize_mask = result - 1; - lg_pagesize = ffs((int)result) - 1; - } -#endif + malloc_write("<jemalloc>: "); + malloc_write(msg); + malloc_write(": "); + memcpy(buf, k, klen); + memcpy(&buf[klen], ":", 1); + memcpy(&buf[klen+1], v, vlen); + buf[klen+1+vlen] = '\0'; + malloc_write(buf); + malloc_write("\n"); +} - for (i = 0; i < 3; i++) { - unsigned j; +static void +malloc_conf_init(void) +{ + unsigned i; + char buf[PATH_MAX + 1]; + const char *opts, *k, *v; + size_t klen, vlen; + for (i = 0; i < 3; i++) { /* Get runtime configuration. */ switch (i) { case 0: - if ((linklen = readlink("/etc/jemalloc.conf", buf, - sizeof(buf) - 1)) != -1) { + if (JEMALLOC_P(malloc_conf) != NULL) { /* - * Use the contents of the "/etc/jemalloc.conf" - * symbolic link's name. + * Use options that were compiled into the + * program. */ - buf[linklen] = '\0'; - opts = buf; + opts = JEMALLOC_P(malloc_conf); } else { /* No configuration specified. */ buf[0] = '\0'; opts = buf; } break; - case 1: - if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) { + case 1: { + int linklen; + const char *linkname = +#ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +#else + "/etc/malloc.conf" +#endif + ; + + if ((linklen = readlink(linkname, buf, + sizeof(buf) - 1)) != -1) { /* - * Do nothing; opts is already initialized to - * the value of the JEMALLOC_OPTIONS - * environment variable. + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. */ + buf[linklen] = '\0'; + opts = buf; } else { /* No configuration specified. */ buf[0] = '\0'; opts = buf; } break; - case 2: - if (JEMALLOC_P(malloc_options) != NULL) { + } + case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if ((opts = getenv(envname)) != NULL) { /* - * Use options that were compiled into the - * program. + * Do nothing; opts is already initialized to + * the value of the JEMALLOC_OPTIONS + * environment variable. */ - opts = JEMALLOC_P(malloc_options); } else { /* No configuration specified. */ buf[0] = '\0'; opts = buf; } break; + } default: /* NOTREACHED */ assert(false); @@ -400,234 +438,196 @@ malloc_init_hard(void) opts = buf; } - for (j = 0; opts[j] != '\0'; j++) { - unsigned k, nreps; - bool nseen; - - /* Parse repetition count, if any. */ - for (nreps = 0, nseen = false;; j++, nseen = true) { - switch (opts[j]) { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - case '8': case '9': - nreps *= 10; - nreps += opts[j] - '0'; - break; - default: - goto MALLOC_OUT; - } + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL(n) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + opt_##n = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + opt_##n = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + continue; \ } -MALLOC_OUT: - if (nseen == false) - nreps = 1; - - for (k = 0; k < nreps; k++) { - switch (opts[j]) { - case 'a': - opt_abort = false; - break; - case 'A': - opt_abort = true; - break; -#ifdef JEMALLOC_PROF - case 'b': - if (opt_lg_prof_bt_max > 0) - opt_lg_prof_bt_max--; - break; - case 'B': - if (opt_lg_prof_bt_max < LG_PROF_BT_MAX) - opt_lg_prof_bt_max++; - break; -#endif - case 'c': - if (opt_lg_cspace_max - 1 > - opt_lg_qspace_max && - opt_lg_cspace_max > - LG_CACHELINE) - opt_lg_cspace_max--; - break; - case 'C': - if (opt_lg_cspace_max < PAGE_SHIFT - - 1) - opt_lg_cspace_max++; - break; - case 'd': - if (opt_lg_dirty_mult + 1 < - (sizeof(size_t) << 3)) - opt_lg_dirty_mult++; - break; - case 'D': - if (opt_lg_dirty_mult >= 0) - opt_lg_dirty_mult--; - break; -#ifdef JEMALLOC_PROF - case 'e': - opt_prof_active = false; - break; - case 'E': - opt_prof_active = true; - break; - case 'f': - opt_prof = false; - break; - case 'F': - opt_prof = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'g': - if (opt_lg_tcache_gc_sweep >= 0) - opt_lg_tcache_gc_sweep--; - break; - case 'G': - if (opt_lg_tcache_gc_sweep + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_gc_sweep++; - break; - case 'h': - opt_tcache = false; - break; - case 'H': - opt_tcache = true; - break; -#endif -#ifdef JEMALLOC_PROF - case 'i': - if (opt_lg_prof_interval >= 0) - opt_lg_prof_interval--; - break; - case 'I': - if (opt_lg_prof_interval + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_interval++; - break; -#endif +#define CONF_HANDLE_SIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + unsigned long ul; \ + char *end; \ + \ + errno = 0; \ + ul = strtoul(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (ul < min || ul > max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = ul; \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + errno = 0; \ + l = strtol(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(n, d) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(opt_##n)-1) ? vlen : \ + sizeof(opt_##n)-1; \ + strncpy(opt_##n, v, cpylen); \ + opt_##n[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(abort) + CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + /* + * Chunks always require at least one * header page, + * plus one data page. + */ + CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(stats_print) #ifdef JEMALLOC_FILL - case 'j': - opt_junk = false; - break; - case 'J': - opt_junk = true; - break; -#endif - case 'k': - /* - * Chunks always require at least one - * header page, plus one data page. - */ - if ((1U << (opt_lg_chunk - 1)) >= - (2U << PAGE_SHIFT)) - opt_lg_chunk--; - break; - case 'K': - if (opt_lg_chunk + 1 < - (sizeof(size_t) << 3)) - opt_lg_chunk++; - break; -#ifdef JEMALLOC_PROF - case 'l': - opt_prof_leak = false; - break; - case 'L': - opt_prof_leak = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'm': - if (opt_lg_tcache_maxclass >= 0) - opt_lg_tcache_maxclass--; - break; - case 'M': - if (opt_lg_tcache_maxclass + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_maxclass++; - break; -#endif - case 'n': - opt_narenas_lshift--; - break; - case 'N': - opt_narenas_lshift++; - break; -#ifdef JEMALLOC_SWAP - case 'o': - opt_overcommit = false; - break; - case 'O': - opt_overcommit = true; - break; -#endif - case 'p': - opt_stats_print = false; - break; - case 'P': - opt_stats_print = true; - break; - case 'q': - if (opt_lg_qspace_max > LG_QUANTUM) - opt_lg_qspace_max--; - break; - case 'Q': - if (opt_lg_qspace_max + 1 < - opt_lg_cspace_max) - opt_lg_qspace_max++; - break; -#ifdef JEMALLOC_PROF - case 's': - if (opt_lg_prof_sample > 0) - opt_lg_prof_sample--; - break; - case 'S': - if (opt_lg_prof_sample + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_sample++; - break; - case 'u': - opt_prof_udump = false; - break; - case 'U': - opt_prof_udump = true; - break; + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) #endif #ifdef JEMALLOC_SYSV - case 'v': - opt_sysv = false; - break; - case 'V': - opt_sysv = true; - break; + CONF_HANDLE_BOOL(sysv) #endif #ifdef JEMALLOC_XMALLOC - case 'x': - opt_xmalloc = false; - break; - case 'X': - opt_xmalloc = true; - break; + CONF_HANDLE_BOOL(xmalloc) #endif -#ifdef JEMALLOC_FILL - case 'z': - opt_zero = false; - break; - case 'Z': - opt_zero = true; - break; -#endif - default: { - char cbuf[2]; - - cbuf[0] = opts[j]; - cbuf[1] = '\0'; - malloc_write( - "<jemalloc>: Unsupported character " - "in malloc options: '"); - malloc_write(cbuf); - malloc_write("'\n"); - } - } - } +#ifdef JEMALLOC_TCACHE + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) +#endif +#ifdef JEMALLOC_PROF + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) +#endif +#ifdef JEMALLOC_SWAP + CONF_HANDLE_BOOL(overcommit) +#endif + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + + /* Validate configuration of options that are inter-related. */ + if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { + malloc_write("<jemalloc>: Invalid lg_[qc]space_max " + "relationship; restoring defaults\n"); + opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; + opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; } } +} + +static bool +malloc_init_hard(void) +{ + arena_t *init_arenas[1]; + + malloc_mutex_lock(&init_lock); + if (malloc_initialized || malloc_initializer == pthread_self()) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + malloc_mutex_unlock(&init_lock); + return (false); + } + if (malloc_initializer != (unsigned long)0) { + /* Busy-wait until the initializing thread completes. */ + do { + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); + } while (malloc_initialized == false); + malloc_mutex_unlock(&init_lock); + return (false); + } + +#ifdef DYNAMIC_PAGE_SHIFT + /* Get page size. */ + { + long result; + + result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (unsigned)result; + + /* + * We assume that pagesize is a power of 2 when calculating + * pagesize_mask and lg_pagesize. + */ + assert(((result - 1) & result) == 0); + pagesize_mask = result - 1; + lg_pagesize = ffs((int)result) - 1; + } +#endif + +#ifdef JEMALLOC_PROF + prof_boot0(); +#endif + + malloc_conf_init(); /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, @@ -662,7 +662,7 @@ MALLOC_OUT: } #ifdef JEMALLOC_PROF - prof_boot0(); + prof_boot1(); #endif if (arena_boot()) { @@ -679,6 +679,15 @@ MALLOC_OUT: return (true); } +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) + /* Initialize allocation counters before any allocations can occur. */ + if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) + != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -697,19 +706,17 @@ MALLOC_OUT: return (true); } -#ifndef NO_TLS /* * Assign the initial arena to the initial thread, in order to avoid * spurious creation of an extra arena if the application switches to * threaded mode. */ - arenas_map = arenas[0]; -#endif + ARENA_SET(arenas[0]); malloc_mutex_init(&arenas_lock); #ifdef JEMALLOC_PROF - if (prof_boot1()) { + if (prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -721,64 +728,40 @@ MALLOC_OUT: ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); - if (ncpus > 1) { + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by * default. */ - opt_narenas_lshift += 2; + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; } + narenas = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas > chunksize / sizeof(arena_t *)) { + char buf[UMAX2S_BUFSIZE]; - /* Determine how many arenas to use. */ - narenas = ncpus; - if (opt_narenas_lshift > 0) { - if ((narenas << opt_narenas_lshift) > narenas) - narenas <<= opt_narenas_lshift; - /* - * Make sure not to exceed the limits of what base_alloc() can - * handle. - */ - if (narenas * sizeof(arena_t *) > chunksize) - narenas = chunksize / sizeof(arena_t *); - } else if (opt_narenas_lshift < 0) { - if ((narenas >> -opt_narenas_lshift) < narenas) - narenas >>= -opt_narenas_lshift; - /* Make sure there is at least one arena. */ - if (narenas == 0) - narenas = 1; + narenas = chunksize / sizeof(arena_t *); + malloc_write("<jemalloc>: Reducing narenas to limit ("); + malloc_write(u2s(narenas, 10, buf)); + malloc_write(")\n"); } -#ifdef NO_TLS - if (narenas > 1) { - static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, - 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, - 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, - 223, 227, 229, 233, 239, 241, 251, 257, 263}; - unsigned nprimes, parenas; + next_arena = (narenas > 0) ? 1 : 0; - /* - * Pick a prime number of hash arenas that is more than narenas - * so that direct hashing of pthread_self() pointers tends to - * spread allocations evenly among the arenas. - */ - assert((narenas & 1) == 0); /* narenas must be even. */ - nprimes = (sizeof(primes) >> LG_SIZEOF_INT); - parenas = primes[nprimes - 1]; /* In case not enough primes. */ - for (i = 1; i < nprimes; i++) { - if (primes[i] > narenas) { - parenas = primes[i]; - break; - } - } - narenas = parenas; +#ifdef NO_TLS + if (pthread_key_create(&arenas_tsd, NULL) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); } #endif -#ifndef NO_TLS - next_arena = (narenas > 0) ? 1 : 0; -#endif - /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); if (arenas == NULL) { @@ -793,11 +776,35 @@ MALLOC_OUT: /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; +#ifdef JEMALLOC_ZONE + /* Register the custom zone. */ + malloc_zone_register(create_zone()); + + /* + * Convert the default szone to an "overlay zone" that is capable of + * deallocating szone-allocated objects, but allocating new objects + * from jemalloc. + */ + szone2ozone(malloc_default_zone()); +#endif + malloc_initialized = true; malloc_mutex_unlock(&init_lock); return (false); } + +#ifdef JEMALLOC_ZONE +JEMALLOC_ATTR(constructor) +void +jemalloc_darwin_init(void) +{ + + if (malloc_init_hard()) + abort(); +} +#endif + /* * End initialization functions. */ @@ -812,8 +819,19 @@ void * JEMALLOC_P(malloc)(size_t size) { void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; #endif if (malloc_init()) { @@ -843,20 +861,26 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { ret = imalloc(small_maxclass+1); if (ret != NULL) - arena_prof_promoted(ret, size); + arena_prof_promoted(ret, usize); } else ret = imalloc(size); } else #endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif ret = imalloc(size); + } OOM: if (ret == NULL) { @@ -875,7 +899,13 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } #endif return (ret); } @@ -887,8 +917,19 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { int ret; void *result; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; #endif if (malloc_init()) @@ -934,24 +975,32 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { + usize = sa2u(size, alignment, NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) { result = NULL; ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= small_maxclass) { - result = ipalloc(alignment, - small_maxclass+1); + (uintptr_t)1U && usize <= small_maxclass) { + result = ipalloc(small_maxclass+1, + alignment, false); if (result != NULL) { arena_prof_promoted(result, - size); + usize); } - } else - result = ipalloc(alignment, size); + } else { + result = ipalloc(size, alignment, + false); + } } } else #endif - result = ipalloc(alignment, size); + { +#ifdef JEMALLOC_STATS + usize = sa2u(size, alignment, NULL); +#endif + result = ipalloc(size, alignment, false); + } } if (result == NULL) { @@ -970,9 +1019,15 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) ret = 0; RETURN: +#ifdef JEMALLOC_STATS + if (result != NULL) { + assert(usize == isalloc(result)); + ALLOCATED_ADD(usize, 0); + } +#endif #ifdef JEMALLOC_PROF if (opt_prof && result != NULL) - prof_malloc(result, cnt); + prof_malloc(result, usize, cnt); #endif return (ret); } @@ -984,8 +1039,19 @@ JEMALLOC_P(calloc)(size_t num, size_t size) { void *ret; size_t num_size; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; #endif if (malloc_init()) { @@ -1020,20 +1086,26 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(num_size)) == NULL) { + usize = s2u(num_size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto RETURN; } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { ret = icalloc(small_maxclass+1); if (ret != NULL) - arena_prof_promoted(ret, num_size); + arena_prof_promoted(ret, usize); } else ret = icalloc(num_size); } else #endif + { +#ifdef JEMALLOC_STATS + usize = s2u(num_size); +#endif ret = icalloc(num_size); + } RETURN: if (ret == NULL) { @@ -1049,7 +1121,13 @@ RETURN: #ifdef JEMALLOC_PROF if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } #endif return (ret); } @@ -1059,10 +1137,25 @@ void * JEMALLOC_P(realloc)(void *ptr, size_t size) { void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; + size_t old_size = 0; +#endif #ifdef JEMALLOC_PROF - size_t old_size; - prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; + prof_ctx_t *old_ctx +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; #endif if (size == 0) { @@ -1073,9 +1166,11 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_SYSV else { if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF if (opt_prof) { - old_size = isalloc(ptr); old_ctx = prof_ctx_get(ptr); cnt = NULL; } @@ -1084,7 +1179,6 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) } #ifdef JEMALLOC_PROF else if (opt_prof) { - old_size = 0; old_ctx = NULL; cnt = NULL; } @@ -1099,24 +1193,33 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) assert(malloc_initialized || malloc_initializer == pthread_self()); +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF if (opt_prof) { - old_size = isalloc(ptr); + usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(size)) == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - size <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1); + usize <= small_maxclass) { + ret = iralloc(ptr, small_maxclass+1, 0, 0, + false, false); if (ret != NULL) - arena_prof_promoted(ret, size); + arena_prof_promoted(ret, usize); } else - ret = iralloc(ptr, size); + ret = iralloc(ptr, size, 0, 0, false, false); } else #endif - ret = iralloc(ptr, size); + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = iralloc(ptr, size, 0, 0, false, false); + } #ifdef JEMALLOC_PROF OOM: @@ -1133,10 +1236,8 @@ OOM: } } else { #ifdef JEMALLOC_PROF - if (opt_prof) { - old_size = 0; + if (opt_prof) old_ctx = NULL; - } #endif if (malloc_init()) { #ifdef JEMALLOC_PROF @@ -1147,23 +1248,29 @@ OOM: } else { #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= + (uintptr_t)1U && usize <= small_maxclass) { ret = imalloc(small_maxclass+1); if (ret != NULL) { arena_prof_promoted(ret, - size); + usize); } } else ret = imalloc(size); } } else #endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif ret = imalloc(size); + } } if (ret == NULL) { @@ -1183,7 +1290,13 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof) - prof_realloc(ret, cnt, ptr, old_size, old_ctx); + prof_realloc(ret, usize, cnt, old_size, old_ctx); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, old_size); + } #endif return (ret); } @@ -1194,12 +1307,26 @@ JEMALLOC_P(free)(void *ptr) { if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + assert(malloc_initialized || malloc_initializer == pthread_self()); +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF - if (opt_prof) - prof_free(ptr); + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); #endif idalloc(ptr); } @@ -1210,6 +1337,57 @@ JEMALLOC_P(free)(void *ptr) */ /******************************************************************************/ /* + * Begin non-standard override functions. + * + * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the + * entire point is to avoid accidental mixed allocator usage. + */ +#ifndef JEMALLOC_PREFIX + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(memalign)(size_t alignment, size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, alignment, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(valloc)(size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#endif /* JEMALLOC_PREFIX */ +/* + * End non-standard override functions. + */ +/******************************************************************************/ +/* * Begin non-standard functions. */ @@ -1219,29 +1397,18 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) { size_t ret; + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + ret = ivsalloc(ptr); +#else assert(ptr != NULL); ret = isalloc(ptr); +#endif return (ret); } -#ifdef JEMALLOC_SWAP -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, int prezeroed) -{ - - /* - * Make sure malloc is initialized, because we need page size, chunk - * size, etc. - */ - if (malloc_init()) - return (-1); - - return (chunk_swap_enable(fds, nfds, (prezeroed != 0)) ? -1 : 0); -} -#endif - JEMALLOC_ATTR(visibility("default")) void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), @@ -1286,6 +1453,247 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } +JEMALLOC_INLINE void * +iallocm(size_t size, size_t alignment, bool zero) +{ + + if (alignment != 0) + return (ipalloc(size, alignment, zero)); + else if (zero) + return (icalloc(size)); + else + return (imalloc(size)); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; +#endif + + assert(ptr != NULL); + assert(size != 0); + + if (malloc_init()) + goto OOM; + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + small_maxclass) { + p = iallocm(small_maxclass+1, alignment, zero); + if (p == NULL) + goto OOM; + arena_prof_promoted(p, usize); + } else { + p = iallocm(size, alignment, zero); + if (p == NULL) + goto OOM; + } + + if (rsize != NULL) + *rsize = usize; + } else +#endif + { + p = iallocm(size, alignment, zero); + if (p == NULL) + goto OOM; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = p; +#ifdef JEMALLOC_STATS + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); +#endif + return (ALLOCM_SUCCESS); +OOM: +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in allocm(): " + "out of memory\n"); + abort(); + } +#endif + *ptr = NULL; + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) +{ + void *p, *q; + size_t usize; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t old_size; +#endif + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; + bool no_move = flags & ALLOCM_NO_MOVE; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; +#endif + + assert(ptr != NULL); + assert(*ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || malloc_initializer == pthread_self()); + + p = *ptr; +#ifdef JEMALLOC_PROF + if (opt_prof) { + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in prof_alloc_prep() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment, NULL); + old_size = isalloc(p); + old_ctx = prof_ctx_get(p); + if ((cnt = prof_alloc_prep(max_usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { + q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= + size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + arena_prof_promoted(q, usize); + } else { + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + } + prof_realloc(q, usize, cnt, old_size, old_ctx); + } else +#endif + { +#ifdef JEMALLOC_STATS + old_size = isalloc(p); +#endif + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = isalloc(q); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = q; +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(usize, old_size); +#endif + return (ALLOCM_SUCCESS); +ERR: + if (no_move) + return (ALLOCM_ERR_NOT_MOVED); +#ifdef JEMALLOC_PROF +OOM: +#endif +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in rallocm(): " + "out of memory\n"); + abort(); + } +#endif + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +{ + size_t sz; + + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + sz = ivsalloc(ptr); +#else + assert(ptr != NULL); + sz = isalloc(ptr); +#endif + assert(rsize != NULL); + *rsize = sz; + + return (ALLOCM_SUCCESS); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(dallocm)(void *ptr, int flags) +{ +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + + assert(ptr != NULL); + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); +#endif + idalloc(ptr); + + return (ALLOCM_SUCCESS); +} + /* * End non-standard functions. */ @@ -1293,12 +1701,10 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, /* * The following functions are used by threading libraries for protection of - * malloc during fork(). These functions are only called if the program is - * running in threaded mode, so there is no need to check whether the program - * is threaded here. + * malloc during fork(). */ -static void +void jemalloc_prefork(void) { unsigned i; @@ -1324,7 +1730,7 @@ jemalloc_prefork(void) #endif } -static void +void jemalloc_postfork(void) { unsigned i; @@ -1349,3 +1755,5 @@ jemalloc_postfork(void) } malloc_mutex_unlock(&arenas_lock); } + +/******************************************************************************/ diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c index 3b6081a..3ecb18a 100644 --- a/jemalloc/src/mutex.c +++ b/jemalloc/src/mutex.c @@ -59,7 +59,11 @@ malloc_mutex_init(malloc_mutex_t *mutex) if (pthread_mutexattr_init(&attr) != 0) return (true); +#ifdef PTHREAD_MUTEX_ADAPTIVE_NP pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +#else + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); +#endif if (pthread_mutex_init(mutex, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); @@ -68,3 +72,13 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (false); } + +void +malloc_mutex_destroy(malloc_mutex_t *mutex) +{ + + if (pthread_mutex_destroy(mutex) != 0) { + malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n"); + abort(); + } +} diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 6d6910e..84ce1ba 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -12,8 +12,6 @@ #include <libunwind.h> #endif -#include <math.h> - /******************************************************************************/ /* Data. */ @@ -22,48 +20,30 @@ bool opt_prof_active = true; size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; -bool opt_prof_udump = false; +bool opt_prof_gdump = false; bool opt_prof_leak = false; +bool opt_prof_accum = true; +ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; +char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; +unsigned prof_bt_max; + +#ifndef NO_TLS +__thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#endif +pthread_key_t prof_tdata_tsd; + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data - * structure that knows about all backtraces ever captured. + * structure that knows about all backtraces currently captured. */ static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; -/* - * Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread - * keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no others - * will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data - * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t - * objects. - */ -static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); - -/* - * Same contents as b2cnt_tls, but initialized such that the TSD destructor is - * called when a thread exits, so that bt2cnt_tls contents can be merged, - * unlinked, and deallocated. - */ -static pthread_key_t bt2cnt_tsd; - -/* (1U << opt_lg_prof_bt_max). */ -static unsigned prof_bt_max; - -static __thread uint64_t prof_sample_prn_state - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_threshold - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_accum - JEMALLOC_ATTR(tls_model("initial-exec")); - static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; @@ -85,26 +65,25 @@ static bool prof_booted = false; static malloc_mutex_t enq_mtx; static bool enq; static bool enq_idump; -static bool enq_udump; +static bool enq_gdump; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_init(prof_bt_t *bt, void **vec); +static void bt_destroy(prof_bt_t *bt); #ifdef JEMALLOC_PROF_LIBGCC static _Unwind_Reason_Code prof_unwind_init_callback( struct _Unwind_Context *context, void *arg); static _Unwind_Reason_Code prof_unwind_callback( struct _Unwind_Context *context, void *arg); #endif -static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); -static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); static bool prof_flush(bool propagate_err); static bool prof_write(const char *s, bool propagate_err); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, +static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx); +static void prof_ctx_destroy(prof_ctx_t *ctx); +static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err); static bool prof_dump_maps(bool propagate_err); @@ -115,11 +94,11 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); -static void bt2cnt_thread_cleanup(void *arg); +static void prof_tdata_cleanup(void *arg); /******************************************************************************/ -static void +void bt_init(prof_bt_t *bt, void **vec) { @@ -127,6 +106,13 @@ bt_init(prof_bt_t *bt, void **vec) bt->len = 0; } +static void +bt_destroy(prof_bt_t *bt) +{ + + idalloc(bt); +} + static prof_bt_t * bt_dup(prof_bt_t *bt) { @@ -165,7 +151,7 @@ prof_enter(void) static inline void prof_leave(void) { - bool idump, udump; + bool idump, gdump; malloc_mutex_unlock(&bt2ctx_mtx); @@ -173,14 +159,14 @@ prof_leave(void) enq = false; idump = enq_idump; enq_idump = false; - udump = enq_udump; - enq_udump = false; + gdump = enq_gdump; + enq_gdump = false; malloc_mutex_unlock(&enq_mtx); if (idump) prof_idump(); - if (udump) - prof_udump(); + if (gdump) + prof_gdump(); } #ifdef JEMALLOC_PROF_LIBGCC @@ -208,7 +194,7 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) return (_URC_NO_REASON); } -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { prof_unwind_data_t data = {bt, nignore, max}; @@ -216,7 +202,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) _Unwind_Backtrace(prof_unwind_callback, &data); } #elif defined(JEMALLOC_PROF_LIBUNWIND) -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { unw_context_t uc; @@ -251,41 +237,29 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } } #else -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { -#define NIGNORE 3 #define BT_FRAME(i) \ - if ((i) < NIGNORE + max) { \ + if ((i) < nignore + max) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= NIGNORE) { \ - bt->vec[(i) - NIGNORE] = p; \ - bt->len = (i) - NIGNORE + 1; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ } \ } else \ return; assert(max <= (1U << opt_lg_prof_bt_max)); - /* - * Ignore the first three frames, since they are: - * - * 0: prof_backtrace() - * 1: prof_alloc_prep() - * 2: malloc(), calloc(), etc. - */ -#if 1 - assert(nignore + 1 == NIGNORE); -#else BT_FRAME(0) BT_FRAME(1) BT_FRAME(2) -#endif BT_FRAME(3) BT_FRAME(4) BT_FRAME(5) @@ -432,345 +406,119 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #endif -static prof_thr_cnt_t * +prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { - prof_thr_cnt_t *ret; - ckh_t *bt2cnt = bt2cnt_tls; - - if (bt2cnt == NULL) { - /* Initialize an empty cache for this thread. */ - bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t)); - if (bt2cnt == NULL) + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) return (NULL); - if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash, - prof_bt_keycomp)) { - idalloc(bt2cnt); - return (NULL); - } - bt2cnt_tls = bt2cnt; - pthread_setspecific(bt2cnt_tsd, bt2cnt); } - if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { - prof_bt_t *btkey; - prof_ctx_t *ctx; + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + union { + prof_bt_t *p; + void *v; + } btkey; + union { + prof_ctx_t *p; + void *v; + } ctx; /* * This thread's cache lacks bt. Look for it in the global * cache. */ prof_enter(); - if (ckh_search(&bt2ctx, bt, (void **)&btkey, (void **)&ctx)) { - + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ - ctx = (prof_ctx_t *)imalloc(sizeof(prof_ctx_t)); - if (ctx == NULL) { + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { prof_leave(); return (NULL); } - btkey = bt_dup(bt); - if (btkey == NULL) { + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { prof_leave(); - idalloc(ctx); + idalloc(ctx.v); return (NULL); } - ctx->bt = btkey; - if (malloc_mutex_init(&ctx->lock)) { + ctx.p->bt = btkey.p; + if (malloc_mutex_init(&ctx.p->lock)) { prof_leave(); - idalloc(btkey); - idalloc(ctx); + idalloc(btkey.v); + idalloc(ctx.v); return (NULL); } - memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx->cnts_ql); - if (ckh_insert(&bt2ctx, btkey, ctx)) { + memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx.p->cnts_ql); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(); - idalloc(btkey); - idalloc(ctx); + malloc_mutex_destroy(&ctx.p->lock); + idalloc(btkey.v); + idalloc(ctx.v); return (NULL); } } + /* + * Acquire ctx's lock before releasing bt2ctx_mtx, in order to + * avoid a race condition with prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ - ret = (prof_thr_cnt_t *)imalloc(sizeof(prof_thr_cnt_t)); - if (ret == NULL) - return (NULL); - ql_elm_new(ret, link); - ret->ctx = ctx; - ret->epoch = 0; - memset(&ret->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(bt2cnt, btkey, ret)) { - idalloc(ret); - return (NULL); - } - malloc_mutex_lock(&ctx->lock); - ql_tail_insert(&ctx->cnts_ql, ret, link); - malloc_mutex_unlock(&ctx->lock); - } - - return (ret); -} - -static inline void -prof_sample_threshold_update(void) -{ - uint64_t r; - double u; - - /* - * Compute prof_sample_threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - */ - prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU, - 1058392653243283975); - u = (double)r * (1.0/9007199254740992.0L); - prof_sample_threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -} - -prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ - prof_thr_cnt_t *ret; - void *vec[prof_bt_max]; - prof_bt_t bt; - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, vec); - prof_backtrace(&bt, 2, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_sample_threshold == 0) { + if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) + == (ZU(1) << opt_lg_prof_tcmax)) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* - * Initialize. Seed the prng differently for each - * thread. + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. */ - prof_sample_prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_sample_threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_sample_threshold - prof_sample_accum) { - bt_init(&bt, vec); - prof_backtrace(&bt, 2, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -} - -prof_ctx_t * -prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - - ret = arena_prof_ctx_get(ptr); - } else - ret = huge_prof_ctx_get(ptr); - - return (ret); -} - -static void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - - arena_prof_ctx_set(ptr, ctx); - } else - huge_prof_ctx_set(ptr, ctx); -} - -static inline void -prof_sample_accum_update(size_t size) -{ - - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - - /* Take care to avoid integer overflow. */ - if (size >= prof_sample_threshold - prof_sample_accum) { - prof_sample_accum -= (prof_sample_threshold - size); - /* Compute new prof_sample_threshold. */ - prof_sample_threshold_update(); - while (prof_sample_accum >= prof_sample_threshold) { - prof_sample_accum -= prof_sample_threshold; - prof_sample_threshold_update(); + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, + NULL); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(opt_lg_prof_tcmax < 0 || + ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << + opt_lg_prof_tcmax)); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) + return (NULL); + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); } - } else - prof_sample_accum += size; -} - -void -prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) -{ - size_t size; - - assert(ptr != NULL); - - if (opt_lg_prof_sample != 0) { - size = isalloc(ptr); - prof_sample_accum_update(size); - } else if ((uintptr_t)cnt > (uintptr_t)1U) - size = isalloc(ptr); - - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); -} - -void -prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_ctx_t *old_ctx) -{ - size_t size; - prof_thr_cnt_t *told_cnt; - - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); - - if (ptr != NULL) { - if (opt_lg_prof_sample != 0) { - size = isalloc(ptr); - prof_sample_accum_update(size); - } else if ((uintptr_t)cnt > (uintptr_t)1U) - size = isalloc(ptr); - } - - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); - if (told_cnt == NULL) { - /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(&old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + /* Finish initializing ret. */ + ret.p->ctx = ctx.p; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + idalloc(ret.v); + return (NULL); } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - cnt->epoch++; - } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; - } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + malloc_mutex_unlock(&ctx.p->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ -} -void -prof_free(const void *ptr) -{ - prof_ctx_t *ctx = prof_ctx_get(ptr); - - if ((uintptr_t)ctx > (uintptr_t)1) { - size_t size = isalloc(ptr); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(&ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); - } - } + return (ret.p); } static bool @@ -823,15 +571,15 @@ prof_write(const char *s, bool propagate_err) } static void -prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) { prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; malloc_mutex_lock(&ctx->lock); - memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, link) { + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { volatile unsigned *epoch = &thr_cnt->epoch; while (true) { @@ -848,22 +596,77 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) break; } - ctx->cnt_dump.curobjs += tcnt.curobjs; - ctx->cnt_dump.curbytes += tcnt.curbytes; - ctx->cnt_dump.accumobjs += tcnt.accumobjs; - ctx->cnt_dump.accumbytes += tcnt.accumbytes; + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; - if (tcnt.curobjs != 0) - (*leak_nctx)++; + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; } - /* Merge into cnt_all. */ - cnt_all->curobjs += ctx->cnt_dump.curobjs; - cnt_all->curbytes += ctx->cnt_dump.curbytes; - cnt_all->accumobjs += ctx->cnt_dump.accumobjs; - cnt_all->accumbytes += ctx->cnt_dump.accumbytes; + malloc_mutex_unlock(&ctx->lock); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to + * avoid a race condition with this function. + */ + prof_enter(); + malloc_mutex_lock(&ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + prof_leave(); + /* Destroy ctx. */ + malloc_mutex_unlock(&ctx->lock); + bt_destroy(ctx->bt); + malloc_mutex_destroy(&ctx->lock); + idalloc(ctx); + } else { + malloc_mutex_unlock(&ctx->lock); + prof_leave(); + } +} + +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0) + destroy = true; + else + destroy = false; malloc_mutex_unlock(&ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); } static bool @@ -872,22 +675,29 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) char buf[UMAX2S_BUFSIZE]; unsigned i; - if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err) + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + assert(ctx->cnt_summed.curbytes == 0); + assert(ctx->cnt_summed.accumobjs == 0); + assert(ctx->cnt_summed.accumbytes == 0); + return (false); + } + + if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), propagate_err) || prof_write("] @", propagate_err)) return (true); for (i = 0; i < bt->len; i++) { if (prof_write(" 0x", propagate_err) - || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf), + || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), propagate_err)) return (true); } @@ -916,7 +726,7 @@ prof_dump_maps(bool propagate_err) memcpy(&mpath[i], s, slen); i += slen; - s = umax2s(getpid(), 10, buf); + s = u2s(getpid(), 10, buf); slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; @@ -958,8 +768,14 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) { prof_cnt_t cnt_all; size_t tabind; - prof_bt_t *bt; - prof_ctx_t *ctx; + union { + prof_bt_t *p; + void *v; + } bt; + union { + prof_ctx_t *p; + void *v; + } ctx; char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; @@ -979,20 +795,18 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, (void **)&ctx) - == false;) { - prof_ctx_merge(ctx, &cnt_all, &leak_nctx); - } + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) + prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); /* Dump profile header. */ if (prof_write("heap profile: ", propagate_err) - || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err)) + || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) goto ERROR; if (opt_lg_prof_sample == 0) { @@ -1000,16 +814,16 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) goto ERROR; } else { if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, + || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, buf), propagate_err) || prof_write("\n", propagate_err)) goto ERROR; } /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx) + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { - if (prof_dump_ctx(ctx, bt, propagate_err)) + if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) goto ERROR; } @@ -1024,12 +838,12 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) if (leakcheck && cnt_all.curbytes != 0) { malloc_write("<jemalloc>: Leak summary: "); - malloc_write(umax2s(cnt_all.curbytes, 10, buf)); + malloc_write(u2s(cnt_all.curbytes, 10, buf)); malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(umax2s(cnt_all.curobjs, 10, buf)); + malloc_write(u2s(cnt_all.curobjs, 10, buf)); malloc_write((cnt_all.curobjs != 1) ? " objects, " : " object, "); - malloc_write(umax2s(leak_nctx, 10, buf)); + malloc_write(u2s(leak_nctx, 10, buf)); malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); malloc_write("<jemalloc>: Run pprof on \""); malloc_write(filename); @@ -1059,31 +873,21 @@ prof_dump_filename(char *filename, char v, int64_t vseq) * Construct a filename of the form: * * <prefix>.<pid>.<seq>.v<vseq>.heap\0 - * or - * jeprof.<pid>.<seq>.v<vseq>.heap\0 */ i = 0; - /* - * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to - * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed - * PATH_MAX, but creat(2) will catch that problem. - */ - if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL - && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) { - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; + s = opt_prof_prefix; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; - s = "."; - } else - s = "jeprof."; + s = "."; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; - s = umax2s(getpid(), 10, buf); + s = u2s(getpid(), 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -1093,7 +897,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) memcpy(&filename[i], s, slen); i += slen; - s = umax2s(prof_dump_seq, 10, buf); + s = u2s(prof_dump_seq, 10, buf); prof_dump_seq++; slen = strlen(s); memcpy(&filename[i], s, slen); @@ -1108,7 +912,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) i++; if (vseq != 0xffffffffffffffffLLU) { - s = umax2s(vseq, 10, buf); + s = u2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -1130,10 +934,12 @@ prof_fdump(void) if (prof_booted == false) return; - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, opt_prof_leak, false); + } } void @@ -1151,11 +957,13 @@ prof_idump(void) } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'i', prof_dump_iseq); - prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } bool @@ -1168,6 +976,8 @@ prof_mdump(const char *filename) if (filename == NULL) { /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; @@ -1178,7 +988,7 @@ prof_mdump(const char *filename) } void -prof_udump(void) +prof_gdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; @@ -1186,17 +996,19 @@ prof_udump(void) return; malloc_mutex_lock(&enq_mtx); if (enq) { - enq_udump = true; + enq_gdump = true; malloc_mutex_unlock(&enq_mtx); return; } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'u', prof_dump_useq); - prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } static void @@ -1239,52 +1051,69 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -static void -bt2cnt_thread_cleanup(void *arg) +prof_tdata_t * +prof_tdata_init(void) { - ckh_t *bt2cnt; + prof_tdata_t *prof_tdata; - bt2cnt = bt2cnt_tls; - if (bt2cnt != NULL) { - ql_head(prof_thr_cnt_t) cnts_ql; - size_t tabind; - prof_thr_cnt_t *cnt; + /* Initialize an empty cache for this thread. */ + prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (prof_tdata == NULL) + return (NULL); - /* Iteratively merge cnt's into the global stats. */ - ql_new(&cnts_ql); - tabind = 0; - while (ckh_iter(bt2cnt, &tabind, NULL, (void **)&cnt) == - false) { - prof_ctx_t *ctx = cnt->ctx; - /* Merge stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, link); - malloc_mutex_unlock(&ctx->lock); + if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloc(prof_tdata); + return (NULL); + } + ql_new(&prof_tdata->lru_ql); - /* - * Stash cnt for deletion after finishing with - * ckh_iter(). - */ - ql_tail_insert(&cnts_ql, cnt, link); - } + prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); + if (prof_tdata->vec == NULL) { + + ckh_delete(&prof_tdata->bt2cnt); + idalloc(prof_tdata); + return (NULL); + } + + prof_tdata->prn_state = 0; + prof_tdata->threshold = 0; + prof_tdata->accum = 0; + + PROF_TCACHE_SET(prof_tdata); + + return (prof_tdata); +} + +static void +prof_tdata_cleanup(void *arg) +{ + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata != NULL) { + prof_thr_cnt_t *cnt; /* - * Delete the hash table now that cnts_ql has a list of all - * cnt's. + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. */ - ckh_delete(bt2cnt); - idalloc(bt2cnt); - bt2cnt_tls = NULL; + ckh_delete(&prof_tdata->bt2cnt); - /* Delete cnt's. */ - while ((cnt = ql_last(&cnts_ql, link)) != NULL) { - ql_remove(&cnts_ql, cnt, link); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); idalloc(cnt); } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } } @@ -1292,6 +1121,14 @@ void prof_boot0(void) { + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) +{ + /* * opt_prof and prof_promote must be in their final state before any * arenas are initialized, so this function must be executed early. @@ -1303,7 +1140,7 @@ prof_boot0(void) * automatically dumped. */ opt_prof = true; - opt_prof_udump = false; + opt_prof_gdump = false; prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { @@ -1317,7 +1154,7 @@ prof_boot0(void) } bool -prof_boot1(void) +prof_boot2(void) { if (opt_prof) { @@ -1326,7 +1163,7 @@ prof_boot1(void) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); - if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup) + if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) != 0) { malloc_write( "<jemalloc>: Error in pthread_key_create()\n"); @@ -1341,7 +1178,7 @@ prof_boot1(void) return (true); enq = false; enq_idump = false; - enq_udump = false; + enq_gdump = false; if (atexit(prof_fdump) != 0) { malloc_write("<jemalloc>: Error in atexit()\n"); diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c new file mode 100644 index 0000000..7753743 --- /dev/null +++ b/jemalloc/src/rtree.c @@ -0,0 +1,43 @@ +#define RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits) +{ + rtree_t *ret; + unsigned bits_per_level, height, i; + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + height = bits / bits_per_level; + if (height * bits_per_level != bits) + height++; + assert(height * bits_per_level >= bits); + + ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + (sizeof(unsigned) * height)); + if (ret == NULL) + return (NULL); + memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * + height)); + + malloc_mutex_init(&ret->mutex); + ret->height = height; + if (bits_per_level * height > bits) + ret->level2bits[0] = bits % bits_per_level; + else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height; i++) + ret->level2bits[i] = bits_per_level; + + ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + /* + * We leak the rtree here, since there's no generic base + * deallocation. + */ + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c index 9dc7529..3dfe0d2 100644 --- a/jemalloc/src/stats.c +++ b/jemalloc/src/stats.c @@ -57,12 +57,12 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal - * integer printing functionality, so that malloc_printf() use can be limited to + * cause unnecessary bloat for static binaries. u2s() provides minimal integer + * printing functionality, so that malloc_printf() use can be limited to * JEMALLOC_STATS code. */ char * -umax2s(uintmax_t x, unsigned base, char *s) +u2s(uint64_t x, unsigned base, char *s) { unsigned i; @@ -72,8 +72,8 @@ umax2s(uintmax_t x, unsigned base, char *s) case 10: do { i--; - s[i] = "0123456789"[x % 10]; - x /= 10; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; } while (x > 0); break; case 16: @@ -86,8 +86,9 @@ umax2s(uintmax_t x, unsigned base, char *s) default: do { i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base]; - x /= base; + s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % + (uint64_t)base]; + x /= (uint64_t)base; } while (x > 0); } @@ -374,6 +375,7 @@ void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { + int err; uint64_t epoch; size_t u64sz; char s[UMAX2S_BUFSIZE]; @@ -383,10 +385,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bins = true; bool large = true; - /* Refresh stats, in case mallctl() was called by the application. */ + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ epoch = 1; u64sz = sizeof(uint64_t); - xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, + sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write("<jemalloc>: Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } if (write_cb == NULL) { /* @@ -430,10 +449,12 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bv; unsigned uv; ssize_t ssv; - size_t sv, bsz, ssz; + size_t sv, bsz, ssz, sssz, cpsz; bsz = sizeof(bool); ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); write_cb(cbopaque, "Version: "); @@ -444,113 +465,140 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, bv ? "enabled" : "disabled"); write_cb(cbopaque, "\n"); - write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: "); - if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "A" : "a"); - if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "E" : "e"); - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "F" : "f"); - if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "H" : "h"); - if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "J" : "j"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "L" : "l"); - if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "O" : "o"); - if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "P" : "p"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "U" : "u"); - if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "V" : "v"); - if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "X" : "x"); - if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "Z" : "z"); - write_cb(cbopaque, "\n"); +#define OPT_WRITE_BOOL(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, bv ? "true" : "false"); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(sv, 10, s)); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ + NULL, 0)) == 0) { \ + if (ssv >= 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(ssv, 10, s)); \ + } else { \ + write_cb(cbopaque, " opt."#n": -"); \ + write_cb(cbopaque, u2s(-ssv, 10, s)); \ + } \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": \""); \ + write_cb(cbopaque, cpv); \ + write_cb(cbopaque, "\"\n"); \ + } + + write_cb(cbopaque, "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_qspace_max) + OPT_WRITE_SIZE_T(lg_cspace_max) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(sysv) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_SIZE_T(lg_prof_bt_max) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_tcmax) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_leak) + OPT_WRITE_BOOL(overcommit) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, umax2s(ncpus, 10, s)); + write_cb(cbopaque, u2s(ncpus, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.narenas", &uv, unsigned); write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, umax2s(uv, 10, s)); + write_cb(cbopaque, u2s(uv, 10, s)); write_cb(cbopaque, "\n"); write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, umax2s(sizeof(void *), 10, s)); + write_cb(cbopaque, u2s(sizeof(void *), 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.quantum", &sv, size_t); write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.cacheline", &sv, size_t); write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.subpage", &sv, size_t); write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); } CTL_GET("arenas.qspace_min", &sv, size_t); write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.cspace_min", &sv, size_t); write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.sspace_min", &sv, size_t); write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((1U << ssv), 10, s)); write_cb(cbopaque, ":1\n"); } else { write_cb(cbopaque, @@ -560,7 +608,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Maximum thread-cached size class: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, @@ -570,39 +618,51 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("opt.tcache", &tcache_enabled, bool); write_cb(cbopaque, "Thread cache GC sweep interval: "); write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - umax2s(tcache_gc_sweep, 10, s) : "N/A"); + u2s(tcache_gc_sweep, 10, s) : "N/A"); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_bt_max", &sv, size_t); write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((1U << sv), 10, s)); write_cb(cbopaque, "\n"); + CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); + write_cb(cbopaque, + "Maximum per thread backtrace cache: "); + if (ssv >= 0) { + write_cb(cbopaque, u2s((1U << ssv), 10, s)); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(ssv, 10, s)); + write_cb(cbopaque, ")\n"); + } else + write_cb(cbopaque, "N/A\n"); + CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); write_cb(cbopaque, "Average profile dump interval: "); if (ssv >= 0) { - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), + 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(ssv, 10, s)); + write_cb(cbopaque, u2s(ssv, 10, s)); write_cb(cbopaque, ")\n"); } else write_cb(cbopaque, "N/A\n"); } CTL_GET("arenas.chunksize", &sv, size_t); write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); CTL_GET("opt.lg_chunk", &sv, size_t); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); } diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index ace24ce..cbbe7a1 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -5,17 +5,19 @@ /* Data. */ bool opt_tcache = true; -ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; /* Map of thread-specific caches. */ +#ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#endif /* * Same contents as tcache, but initialized such that the TSD destructor is * called when a thread exits, so that the cache can be cleaned up. */ -static pthread_key_t tcache_tsd; +pthread_key_t tcache_tsd; size_t nhbins; size_t tcache_maxclass; @@ -93,10 +95,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { - size_t pageind = (((uintptr_t)ptr - - (uintptr_t)chunk) >> PAGE_SHIFT); + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> PAGE_SHIFT; arena_chunk_map_t *mapelm = - &chunk->map[pageind]; + &chunk->map[pageind-map_bias]; arena_dalloc_bin(arena, chunk, ptr, mapelm); } else { /* @@ -202,12 +204,14 @@ tcache_create(arena_t *arena) size_t size; unsigned i; - size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nhbins - 1)); + size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. * - * That this works relies on the same logic as in ipalloc(). + * That this works relies on the same logic as in ipalloc(), but we + * cannot directly call ipalloc() here due to tcache bootstrapping + * issues. */ size = (size + CACHELINE_MASK) & (-CACHELINE); @@ -239,8 +243,7 @@ tcache_create(arena_t *arena) for (; i < nhbins; i++) tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; - tcache_tls = tcache; - pthread_setspecific(tcache_tsd, tcache); + TCACHE_SET(tcache); return (tcache); } @@ -308,9 +311,9 @@ tcache_destroy(tcache_t *tcache) if (arena_salloc(tcache) <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; - size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >> - PAGE_SHIFT); - arena_chunk_map_t *mapelm = &chunk->map[pageind]; + size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> + PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -328,11 +331,24 @@ tcache_thread_cleanup(void *arg) { tcache_t *tcache = (tcache_t *)arg; - assert(tcache == tcache_tls); - if (tcache != NULL) { + if (tcache == (void *)(uintptr_t)1) { + /* + * The previous time this destructor was called, we set the key + * to 1 so that other destructors wouldn't cause re-creation of + * the tcache. This time, do nothing, so that the destructor + * will not be called again. + */ + } else if (tcache == (void *)(uintptr_t)2) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + TCACHE_SET((uintptr_t)1); + } else if (tcache != NULL) { assert(tcache != (void *)(uintptr_t)1); tcache_destroy(tcache); - tcache_tls = (void *)(uintptr_t)1; + TCACHE_SET((uintptr_t)1); } } @@ -368,16 +384,16 @@ tcache_boot(void) if (opt_tcache) { /* - * If necessary, clamp opt_lg_tcache_maxclass, now that + * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. */ - if (opt_lg_tcache_maxclass < 0 || (1U << - opt_lg_tcache_maxclass) < small_maxclass) + if (opt_lg_tcache_max < 0 || (1U << + opt_lg_tcache_max) < small_maxclass) tcache_maxclass = small_maxclass; - else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass) + else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else - tcache_maxclass = (1U << opt_lg_tcache_maxclass); + tcache_maxclass = (1U << opt_lg_tcache_max); nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c new file mode 100644 index 0000000..2c1b231 --- /dev/null +++ b/jemalloc/src/zone.c @@ -0,0 +1,354 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif + +/******************************************************************************/ +/* Data. */ + +static malloc_zone_t zone, szone; +static struct malloc_introspection_t zone_introspect, ozone_introspect; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t zone_size(malloc_zone_t *zone, void *ptr); +static void *zone_malloc(malloc_zone_t *zone, size_t size); +static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); +static void *zone_valloc(malloc_zone_t *zone, size_t size); +static void zone_free(malloc_zone_t *zone, void *ptr); +static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void *zone_memalign(malloc_zone_t *zone, size_t alignment, + size_t size); +static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void *zone_destroy(malloc_zone_t *zone); +static size_t zone_good_size(malloc_zone_t *zone, size_t size); +static void zone_force_lock(malloc_zone_t *zone); +static void zone_force_unlock(malloc_zone_t *zone); +static size_t ozone_size(malloc_zone_t *zone, void *ptr); +static void ozone_free(malloc_zone_t *zone, void *ptr); +static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, + void **results, unsigned num_requested); +static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, + unsigned num); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void ozone_force_lock(malloc_zone_t *zone); +static void ozone_force_unlock(malloc_zone_t *zone); + +/******************************************************************************/ +/* + * Functions. + */ + +static size_t +zone_size(malloc_zone_t *zone, void *ptr) +{ + + /* + * There appear to be places within Darwin (such as setenv(3)) that + * cause calls to this function with pointers that *no* zone owns. If + * we knew that all pointers were owned by *some* zone, we could split + * our zone into two parts, and use one as the default allocator and + * the other as the default deallocator/reallocator. Since that will + * not work in practice, we must check all pointers to assure that they + * reside within a mapped chunk before determining size. + */ + return (ivsalloc(ptr)); +} + +static void * +zone_malloc(malloc_zone_t *zone, size_t size) +{ + + return (JEMALLOC_P(malloc)(size)); +} + +static void * +zone_calloc(malloc_zone_t *zone, size_t num, size_t size) +{ + + return (JEMALLOC_P(calloc)(num, size)); +} + +static void * +zone_valloc(malloc_zone_t *zone, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + + return (ret); +} + +static void +zone_free(malloc_zone_t *zone, void *ptr) +{ + + JEMALLOC_P(free)(ptr); +} + +static void * +zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + + return (JEMALLOC_P(realloc)(ptr, size)); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void * +zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, alignment, size); + + return (ret); +} + +static void +zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); +} +#endif + +static void * +zone_destroy(malloc_zone_t *zone) +{ + + /* This function should never be called. */ + assert(false); + return (NULL); +} + +static size_t +zone_good_size(malloc_zone_t *zone, size_t size) +{ + size_t ret; + void *p; + + /* + * Actually create an object of the appropriate size, then find out + * how large it could have been without moving up to the next size + * class. + */ + p = JEMALLOC_P(malloc)(size); + if (p != NULL) { + ret = isalloc(p); + JEMALLOC_P(free)(p); + } else + ret = size; + + return (ret); +} + +static void +zone_force_lock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_prefork(); +} + +static void +zone_force_unlock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_postfork(); +} + +malloc_zone_t * +create_zone(void) +{ + + zone.size = (void *)zone_size; + zone.malloc = (void *)zone_malloc; + zone.calloc = (void *)zone_calloc; + zone.valloc = (void *)zone_valloc; + zone.free = (void *)zone_free; + zone.realloc = (void *)zone_realloc; + zone.destroy = (void *)zone_destroy; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone.memalign = zone_memalign; + zone.free_definite_size = zone_free_definite_size; +#endif + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = (void *)zone_good_size; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = (void *)zone_force_lock; + zone_introspect.force_unlock = (void *)zone_force_unlock; + zone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone_introspect.zone_locked = NULL; +#endif + + return (&zone); +} + +static size_t +ozone_size(malloc_zone_t *zone, void *ptr) +{ + size_t ret; + + ret = ivsalloc(ptr); + if (ret == 0) + ret = szone.size(zone, ptr); + + return (ret); +} + +static void +ozone_free(malloc_zone_t *zone, void *ptr) +{ + + if (ivsalloc(ptr) != 0) + JEMALLOC_P(free)(ptr); + else { + size_t size = szone.size(zone, ptr); + if (size != 0) + (szone.free)(zone, ptr); + } +} + +static void * +ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + size_t oldsize; + + if (ptr == NULL) + return (JEMALLOC_P(malloc)(size)); + + oldsize = ivsalloc(ptr); + if (oldsize != 0) + return (JEMALLOC_P(realloc)(ptr, size)); + else { + oldsize = szone.size(zone, ptr); + if (oldsize == 0) + return (JEMALLOC_P(malloc)(size)); + else { + void *ret = JEMALLOC_P(malloc)(size); + if (ret != NULL) { + memcpy(ret, ptr, (oldsize < size) ? oldsize : + size); + (szone.free)(zone, ptr); + } + return (ret); + } + } +} + +static unsigned +ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, + unsigned num_requested) +{ + + /* Don't bother implementing this interface, since it isn't required. */ + return (0); +} + +static void +ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) +{ + unsigned i; + + for (i = 0; i < num; i++) + ozone_free(zone, to_be_freed[i]); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void +ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + if (ivsalloc(ptr) != 0) { + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); + } else { + assert(size == szone.size(zone, ptr)); + szone.free_definite_size(zone, ptr, size); + } +} +#endif + +static void +ozone_force_lock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_lock(zone); +} + +static void +ozone_force_unlock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_unlock(zone); +} + +/* + * Overlay the default scalable zone (szone) such that existing allocations are + * drained, and further allocations come from jemalloc. This is necessary + * because Core Foundation directly accesses and uses the szone before the + * jemalloc library is even loaded. + */ +void +szone2ozone(malloc_zone_t *zone) +{ + + /* + * Stash a copy of the original szone so that we can call its + * functions as needed. Note that the internally, the szone stores its + * bookkeeping data structures immediately following the malloc_zone_t + * header, so when calling szone functions, we need to pass a pointer + * to the original zone structure. + */ + memcpy(&szone, zone, sizeof(malloc_zone_t)); + + zone->size = (void *)ozone_size; + zone->malloc = (void *)zone_malloc; + zone->calloc = (void *)zone_calloc; + zone->valloc = (void *)zone_valloc; + zone->free = (void *)ozone_free; + zone->realloc = (void *)ozone_realloc; + zone->destroy = (void *)zone_destroy; + zone->zone_name = "jemalloc_ozone"; + zone->batch_malloc = ozone_batch_malloc; + zone->batch_free = ozone_batch_free; + zone->introspect = &ozone_introspect; + zone->version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone->memalign = zone_memalign; + zone->free_definite_size = ozone_free_definite_size; +#endif + + ozone_introspect.enumerator = NULL; + ozone_introspect.good_size = (void *)zone_good_size; + ozone_introspect.check = NULL; + ozone_introspect.print = NULL; + ozone_introspect.log = NULL; + ozone_introspect.force_lock = (void *)ozone_force_lock; + ozone_introspect.force_unlock = (void *)ozone_force_unlock; + ozone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + ozone_introspect.zone_locked = NULL; +#endif +} |
