diff options
author | Jason Evans <jasone@canonware.com> | 2015-01-31 06:54:08 (GMT) |
---|---|---|
committer | Jason Evans <jasone@canonware.com> | 2015-02-05 00:51:53 (GMT) |
commit | 8d0e04d42f4750970ac3052a6c76379b60aba5dc (patch) | |
tree | 25d71a94a914eb4f69c524f14b5f8d28eaf01881 /include | |
parent | c810fcea1fa7983ef5bcabe6556cdc19dde6dd8d (diff) | |
download | jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.zip jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.gz jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.bz2 |
Refactor rtree to be lock-free.
Recent huge allocation refactoring associates huge allocations with
arenas, but it remains necessary to quickly look up huge allocation
metadata during reallocation/deallocation. A global radix tree remains
a good solution to this problem, but locking would have become the
primary bottleneck after (upcoming) migration of chunk management from
global to per arena data structures.
This lock-free implementation uses double-checked reads to traverse the
tree, so that in the steady state, each read or write requires only a
single atomic operation.
This implementation also assures that no more than two tree levels
actually exist, through a combination of careful virtual memory
allocation which makes large sparse nodes cheap, and skipping the root
node on x64 (possible because the top 16 bits are all 0 in practice).
Diffstat (limited to 'include')
-rw-r--r-- | include/jemalloc/internal/chunk.h | 2 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_internal.h.in | 2 | ||||
-rw-r--r-- | include/jemalloc/internal/private_symbols.txt | 15 | ||||
-rw-r--r-- | include/jemalloc/internal/rtree.h | 344 |
4 files changed, 235 insertions, 128 deletions
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 764b7ac..62ac3e7 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -35,7 +35,7 @@ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ extern chunk_stats_t stats_chunks; -extern rtree_t *chunks_rtree; +extern rtree_t chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 79a23e5..280501d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -955,7 +955,7 @@ ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + if (rtree_get(&chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) return (0); return (isalloc(ptr, demote)); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 105e664..7a78f58 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -369,14 +369,21 @@ quarantine_alloc_hook quarantine_alloc_hook_work quarantine_cleanup register_zone +rtree_child_read +rtree_child_read_hard +rtree_child_tryread rtree_delete rtree_get -rtree_get_locked rtree_new -rtree_postfork_child -rtree_postfork_parent -rtree_prefork +rtree_node_valid rtree_set +rtree_start_level +rtree_subkey +rtree_subtree_read +rtree_subtree_read_hard +rtree_subtree_tryread +rtree_val_read +rtree_val_write s2u s2u_compute s2u_lookup diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index bc74769..e86e17c 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -1,170 +1,270 @@ /* * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. + * associating metadata with chunks that are currently owned by jemalloc. * ******************************************************************************* */ #ifdef JEMALLOC_H_TYPES +typedef struct rtree_node_elm_s rtree_node_elm_t; +typedef struct rtree_level_s rtree_level_t; typedef struct rtree_s rtree_t; /* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. */ -#define RTREE_NODESIZE (1U << 16) +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) +#define RTREE_HEIGHT_MAX \ + ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -typedef void *(rtree_alloc_t)(size_t); -typedef void (rtree_dalloc_t)(void *); +/* Used for two-stage lock-free node initialization. */ +#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) + +/* + * The node allocation callback function's argument is the number of contiguous + * rtree_node_elm_t structures to allocate, and the resulting memory must be + * zeroed. + */ +typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); +typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct rtree_node_elm_s { + union { + rtree_node_elm_t *child; + void *val; + }; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via subtrees[1], and the remainder of the + * tree can be accessed via subtrees[0]. + * + * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * subtrees[0] unused and avoiding a level of tree traversal. + */ + rtree_node_elm_t *subtree; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + struct rtree_s { - rtree_alloc_t *alloc; - rtree_dalloc_t *dalloc; - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ + rtree_node_alloc_t *alloc; + rtree_node_dalloc_t *dalloc; + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc); void rtree_delete(rtree_t *rtree); -void rtree_prefork(rtree_t *rtree); -void rtree_postfork_parent(rtree_t *rtree); -void rtree_postfork_child(rtree_t *rtree); +rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, + unsigned level); +rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, + rtree_node_elm_t *elm, unsigned level); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifdef JEMALLOC_DEBUG -uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -uint8_t rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); +unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_node_elm_t *node); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, + unsigned level); +void *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); + +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE uint8_t \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - uint8_t ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (0); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - { \ - uint8_t *leaf = (uint8_t *)node; \ - ret = leaf[subkey]; \ - } \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ +JEMALLOC_INLINE unsigned +rtree_start_level(rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[lg_floor(key) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); } -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif +JEMALLOC_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) +rtree_node_valid(rtree_node_elm_t *node) +{ + + return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm) +{ + rtree_node_elm_t *child; + + /* Double-checked read (first read may be stale. */ + child = elm->child; + if (!rtree_node_valid(child)) + child = atomic_read_p((void **)&elm->child); + return (child); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +{ + rtree_node_elm_t *child; + + child = rtree_child_tryread(elm); + if (unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(rtree, elm, level); + return (child); +} + +JEMALLOC_INLINE void * +rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm) +{ + + return (atomic_read_p(&elm->val)); +} + +JEMALLOC_INLINE void +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val) +{ + + atomic_write_p(&elm->val, val); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + /* Double-checked read (first read may be stale. */ + subtree = rtree->levels[level].subtree; + if (!rtree_node_valid(subtree)) + subtree = atomic_read_p((void **)&rtree->levels[level].subtree); + return (subtree); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level); + if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(rtree, level); + return (subtree); +} + +JEMALLOC_INLINE void * +rtree_get(rtree_t *rtree, uintptr_t key) { uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; - - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - size_t size = ((i + 1 < height - 1) ? sizeof(void *) - : (sizeof(uint8_t))) << rtree->level2bits[i+1]; - child = (void**)rtree->alloc(size); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, size); - node[subkey] = child; + unsigned i, start_level; + rtree_node_elm_t *node, *child; + + start_level = rtree_start_level(rtree, key); + + for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); + /**/; i++, node = child) { + if (unlikely(!rtree_node_valid(node))) + return (NULL); + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + return (rtree_val_read(rtree, &node[subkey])); } + assert(i < rtree->height - 1); + child = rtree_child_tryread(&node[subkey]); } + not_reached(); +} - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - { - uint8_t *leaf = (uint8_t *)node; - leaf[subkey] = val; - } - malloc_mutex_unlock(&rtree->mutex); +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, start_level; + rtree_node_elm_t *node, *child; - return (false); + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_read(rtree, start_level); + if (node == NULL) + return (true); + for (i = start_level; /**/; i++, node = child) { + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + rtree_val_write(rtree, &node[subkey], val); + return (false); + } + assert(i < rtree->height - 1); + child = rtree_child_read(rtree, &node[subkey], i); + if (child == NULL) + return (true); + } + not_reached(); } #endif |