diff options
author | Jason Evans <jasone@canonware.com> | 2015-01-31 06:54:08 (GMT) |
---|---|---|
committer | Jason Evans <jasone@canonware.com> | 2015-02-05 00:51:53 (GMT) |
commit | 8d0e04d42f4750970ac3052a6c76379b60aba5dc (patch) | |
tree | 25d71a94a914eb4f69c524f14b5f8d28eaf01881 /src | |
parent | c810fcea1fa7983ef5bcabe6556cdc19dde6dd8d (diff) | |
download | jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.zip jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.gz jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.bz2 |
Refactor rtree to be lock-free.
Recent huge allocation refactoring associates huge allocations with
arenas, but it remains necessary to quickly look up huge allocation
metadata during reallocation/deallocation. A global radix tree remains
a good solution to this problem, but locking would have become the
primary bottleneck after (upcoming) migration of chunk management from
global to per arena data structures.
This lock-free implementation uses double-checked reads to traverse the
tree, so that in the steady state, each read or write requires only a
single atomic operation.
This implementation also assures that no more than two tree levels
actually exist, through a combination of careful virtual memory
allocation which makes large sparse nodes cheap, and skipping the root
node on x64 (possible because the top 16 bits are all 0 in practice).
Diffstat (limited to 'src')
-rw-r--r-- | src/chunk.c | 25 | ||||
-rw-r--r-- | src/rtree.c | 138 |
2 files changed, 92 insertions, 71 deletions
diff --git a/src/chunk.c b/src/chunk.c index 01180a7..9ba0b0c 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -21,7 +21,7 @@ static extent_tree_t chunks_ad_mmap; static extent_tree_t chunks_szad_dss; static extent_tree_t chunks_ad_dss; -rtree_t *chunks_rtree; +rtree_t chunks_rtree; /* Various chunk-related settings. */ size_t chunksize; @@ -200,7 +200,7 @@ chunk_register(void *chunk, size_t size, bool base) assert(CHUNK_ADDR2BASE(chunk) == chunk); if (config_ivsalloc && !base) { - if (rtree_set(chunks_rtree, (uintptr_t)chunk, 1)) + if (rtree_set(&chunks_rtree, (uintptr_t)chunk, chunk)) return (true); } if (config_stats || config_prof) { @@ -395,7 +395,7 @@ chunk_dalloc_core(void *chunk, size_t size) assert((size & chunksize_mask) == 0); if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, 0); + rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); assert(stats_chunks.curchunks >= (size / chunksize)); @@ -415,6 +415,14 @@ chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind) return (false); } +static rtree_node_elm_t * +chunks_rtree_node_alloc(size_t nelms) +{ + + return ((rtree_node_elm_t *)base_alloc(nelms * + sizeof(rtree_node_elm_t))); +} + bool chunk_boot(void) { @@ -436,9 +444,8 @@ chunk_boot(void) extent_tree_szad_new(&chunks_szad_dss); extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, base_alloc, NULL); - if (chunks_rtree == NULL) + if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, chunks_rtree_node_alloc, NULL)) return (true); } @@ -450,8 +457,6 @@ chunk_prefork(void) { malloc_mutex_prefork(&chunks_mtx); - if (config_ivsalloc) - rtree_prefork(chunks_rtree); chunk_dss_prefork(); } @@ -460,8 +465,6 @@ chunk_postfork_parent(void) { chunk_dss_postfork_parent(); - if (config_ivsalloc) - rtree_postfork_parent(chunks_rtree); malloc_mutex_postfork_parent(&chunks_mtx); } @@ -470,7 +473,5 @@ chunk_postfork_child(void) { chunk_dss_postfork_child(); - if (config_ivsalloc) - rtree_postfork_child(chunks_rtree); malloc_mutex_postfork_child(&chunks_mtx); } diff --git a/src/rtree.c b/src/rtree.c index 2ff93db..47d9084 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -1,75 +1,74 @@ #define JEMALLOC_RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" -rtree_t * -rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) +static unsigned +hmin(unsigned ha, unsigned hb) { - rtree_t *ret; - unsigned bits_per_level, bits_in_leaf, height, i; + + return (ha < hb ? ha : hb); +} + +/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */ +bool +rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc) +{ + unsigned bits_in_leaf, height, i; assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void - *)))) - 1; - bits_in_leaf = jemalloc_ffs(pow2_ceil((RTREE_NODESIZE / - sizeof(uint8_t)))) - 1; + bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL + : (bits % RTREE_BITS_PER_LEVEL); if (bits > bits_in_leaf) { - height = 1 + (bits - bits_in_leaf) / bits_per_level; - if ((height-1) * bits_per_level + bits_in_leaf != bits) + height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL; + if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits) height++; - } else { + } else height = 1; + assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits); + + rtree->alloc = alloc; + rtree->dalloc = dalloc; + rtree->height = height; + + /* Root level. */ + rtree->levels[0].subtree = NULL; + rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL : + bits_in_leaf; + rtree->levels[0].cumbits = rtree->levels[0].bits; + /* Interior levels. */ + for (i = 1; i < height-1; i++) { + rtree->levels[i].subtree = NULL; + rtree->levels[i].bits = RTREE_BITS_PER_LEVEL; + rtree->levels[i].cumbits = rtree->levels[i-1].cumbits + + RTREE_BITS_PER_LEVEL; } - assert((height-1) * bits_per_level + bits_in_leaf >= bits); - - ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + - (sizeof(unsigned) * height)); - if (ret == NULL) - return (NULL); - memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * - height)); - - ret->alloc = alloc; - ret->dalloc = dalloc; - if (malloc_mutex_init(&ret->mutex)) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); - } - ret->height = height; + /* Leaf level. */ if (height > 1) { - if ((height-1) * bits_per_level + bits_in_leaf > bits) { - ret->level2bits[0] = (bits - bits_in_leaf) % - bits_per_level; - } else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height-1; i++) - ret->level2bits[i] = bits_per_level; - ret->level2bits[height-1] = bits_in_leaf; - } else - ret->level2bits[0] = bits; + rtree->levels[height-1].subtree = NULL; + rtree->levels[height-1].bits = bits_in_leaf; + rtree->levels[height-1].cumbits = bits; + } - ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); - if (ret->root == NULL) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); + /* Compute lookup table to be used by rtree_start_level(). */ + for (i = 0; i < RTREE_HEIGHT_MAX; i++) { + rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height - + 1); } - memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); - return (ret); + return (false); } static void -rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level) { if (level < rtree->height - 1) { size_t nchildren, i; - nchildren = ZU(1) << rtree->level2bits[level]; + nchildren = ZU(1) << rtree->levels[level].bits; for (i = 0; i < nchildren; i++) { - void **child = (void **)node[i]; + rtree_node_elm_t *child = node[i].child; if (child != NULL) rtree_delete_subtree(rtree, child, level + 1); } @@ -80,28 +79,49 @@ rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) void rtree_delete(rtree_t *rtree) { + unsigned i; - rtree_delete_subtree(rtree, rtree->root, 0); - rtree->dalloc(rtree); + for (i = 0; i < rtree->height; i++) { + rtree_node_elm_t *subtree = rtree->levels[i].subtree; + if (subtree != NULL) + rtree_delete_subtree(rtree, subtree, i); + } } -void -rtree_prefork(rtree_t *rtree) +static rtree_node_elm_t * +rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) { + rtree_node_elm_t *node; + + if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + /* + * Another thread is already in the process of initializing. + * Spin-wait until initialization is complete. + */ + do { + CPU_SPINWAIT; + node = atomic_read_p((void **)elmp); + } while (node == RTREE_NODE_INITIALIZING); + } else { + node = rtree->alloc(ZU(1) << rtree->levels[level].bits); + if (node == NULL) + return (NULL); + atomic_write_p((void **)elmp, node); + } - malloc_mutex_prefork(&rtree->mutex); + return (node); } -void -rtree_postfork_parent(rtree_t *rtree) +rtree_node_elm_t * +rtree_subtree_read_hard(rtree_t *rtree, unsigned level) { - malloc_mutex_postfork_parent(&rtree->mutex); + return (rtree_node_init(rtree, level, &rtree->levels[level].subtree)); } -void -rtree_postfork_child(rtree_t *rtree) +rtree_node_elm_t * +rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - malloc_mutex_postfork_child(&rtree->mutex); + return (rtree_node_init(rtree, level, &elm->child)); } |