summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2015-01-31 06:54:08 (GMT)
committerJason Evans <jasone@canonware.com>2015-02-05 00:51:53 (GMT)
commit8d0e04d42f4750970ac3052a6c76379b60aba5dc (patch)
tree25d71a94a914eb4f69c524f14b5f8d28eaf01881 /include
parentc810fcea1fa7983ef5bcabe6556cdc19dde6dd8d (diff)
downloadjemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.zip
jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.gz
jemalloc-8d0e04d42f4750970ac3052a6c76379b60aba5dc.tar.bz2
Refactor rtree to be lock-free.
Recent huge allocation refactoring associates huge allocations with arenas, but it remains necessary to quickly look up huge allocation metadata during reallocation/deallocation. A global radix tree remains a good solution to this problem, but locking would have become the primary bottleneck after (upcoming) migration of chunk management from global to per arena data structures. This lock-free implementation uses double-checked reads to traverse the tree, so that in the steady state, each read or write requires only a single atomic operation. This implementation also assures that no more than two tree levels actually exist, through a combination of careful virtual memory allocation which makes large sparse nodes cheap, and skipping the root node on x64 (possible because the top 16 bits are all 0 in practice).
Diffstat (limited to 'include')
-rw-r--r--include/jemalloc/internal/chunk.h2
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in2
-rw-r--r--include/jemalloc/internal/private_symbols.txt15
-rw-r--r--include/jemalloc/internal/rtree.h344
4 files changed, 235 insertions, 128 deletions
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 764b7ac..62ac3e7 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -35,7 +35,7 @@ extern malloc_mutex_t chunks_mtx;
/* Chunk statistics. */
extern chunk_stats_t stats_chunks;
-extern rtree_t *chunks_rtree;
+extern rtree_t chunks_rtree;
extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 79a23e5..280501d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -955,7 +955,7 @@ ivsalloc(const void *ptr, bool demote)
{
/* Return 0 if ptr is not within a chunk managed by jemalloc. */
- if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
+ if (rtree_get(&chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
return (0);
return (isalloc(ptr, demote));
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 105e664..7a78f58 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -369,14 +369,21 @@ quarantine_alloc_hook
quarantine_alloc_hook_work
quarantine_cleanup
register_zone
+rtree_child_read
+rtree_child_read_hard
+rtree_child_tryread
rtree_delete
rtree_get
-rtree_get_locked
rtree_new
-rtree_postfork_child
-rtree_postfork_parent
-rtree_prefork
+rtree_node_valid
rtree_set
+rtree_start_level
+rtree_subkey
+rtree_subtree_read
+rtree_subtree_read_hard
+rtree_subtree_tryread
+rtree_val_read
+rtree_val_write
s2u
s2u_compute
s2u_lookup
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index bc74769..e86e17c 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,170 +1,270 @@
/*
* This radix tree implementation is tailored to the singular purpose of
- * tracking which chunks are currently owned by jemalloc. This functionality
- * is mandatory for OS X, where jemalloc must be able to respond to object
- * ownership queries.
+ * associating metadata with chunks that are currently owned by jemalloc.
*
*******************************************************************************
*/
#ifdef JEMALLOC_H_TYPES
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+typedef struct rtree_level_s rtree_level_t;
typedef struct rtree_s rtree_t;
/*
- * Size of each radix tree node (must be a power of 2). This impacts tree
- * depth.
+ * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
+ * machine address width.
*/
-#define RTREE_NODESIZE (1U << 16)
+#define LG_RTREE_BITS_PER_LEVEL 4
+#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define RTREE_HEIGHT_MAX \
+ ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
-typedef void *(rtree_alloc_t)(size_t);
-typedef void (rtree_dalloc_t)(void *);
+/* Used for two-stage lock-free node initialization. */
+#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
+
+/*
+ * The node allocation callback function's argument is the number of contiguous
+ * rtree_node_elm_t structures to allocate, and the resulting memory must be
+ * zeroed.
+ */
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
+typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
+struct rtree_node_elm_s {
+ union {
+ rtree_node_elm_t *child;
+ void *val;
+ };
+};
+
+struct rtree_level_s {
+ /*
+ * A non-NULL subtree points to a subtree rooted along the hypothetical
+ * path to the leaf node corresponding to key 0. Depending on what keys
+ * have been used to store to the tree, an arbitrary combination of
+ * subtree pointers may remain NULL.
+ *
+ * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
+ * This results in a 3-level tree, and the leftmost leaf can be directly
+ * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
+ * 0x00000000) can be accessed via subtrees[1], and the remainder of the
+ * tree can be accessed via subtrees[0].
+ *
+ * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
+ *
+ * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
+ *
+ * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
+ *
+ * This has practical implications on x64, which currently uses only the
+ * lower 47 bits of virtual address space in userland, thus leaving
+ * subtrees[0] unused and avoiding a level of tree traversal.
+ */
+ rtree_node_elm_t *subtree;
+ /* Number of key bits distinguished by this level. */
+ unsigned bits;
+ /*
+ * Cumulative number of key bits distinguished by traversing to
+ * corresponding tree level.
+ */
+ unsigned cumbits;
+};
+
struct rtree_s {
- rtree_alloc_t *alloc;
- rtree_dalloc_t *dalloc;
- malloc_mutex_t mutex;
- void **root;
- unsigned height;
- unsigned level2bits[1]; /* Dynamically sized. */
+ rtree_node_alloc_t *alloc;
+ rtree_node_dalloc_t *dalloc;
+ unsigned height;
+ /*
+ * Precomputed table used to convert from the number of leading 0 key
+ * bits to which subtree level to start at.
+ */
+ unsigned start_level[RTREE_HEIGHT_MAX];
+ rtree_level_t levels[RTREE_HEIGHT_MAX];
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc);
+bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
+ rtree_node_dalloc_t *dalloc);
void rtree_delete(rtree_t *rtree);
-void rtree_prefork(rtree_t *rtree);
-void rtree_postfork_parent(rtree_t *rtree);
-void rtree_postfork_child(rtree_t *rtree);
+rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree,
+ unsigned level);
+rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree,
+ rtree_node_elm_t *elm, unsigned level);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-#ifdef JEMALLOC_DEBUG
-uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key);
-#endif
-uint8_t rtree_get(rtree_t *rtree, uintptr_t key);
-bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val);
+unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
+uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
+
+bool rtree_node_valid(rtree_node_elm_t *node);
+rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
+ unsigned level);
+void *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm);
+void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val);
+rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
+rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
+
+void *rtree_get(rtree_t *rtree, uintptr_t key);
+bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-#define RTREE_GET_GENERATE(f) \
-/* The least significant bits of the key are ignored. */ \
-JEMALLOC_INLINE uint8_t \
-f(rtree_t *rtree, uintptr_t key) \
-{ \
- uint8_t ret; \
- uintptr_t subkey; \
- unsigned i, lshift, height, bits; \
- void **node, **child; \
- \
- RTREE_LOCK(&rtree->mutex); \
- for (i = lshift = 0, height = rtree->height, node = rtree->root;\
- i < height - 1; \
- i++, lshift += bits, node = child) { \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
- 3)) - bits); \
- child = (void**)node[subkey]; \
- if (child == NULL) { \
- RTREE_UNLOCK(&rtree->mutex); \
- return (0); \
- } \
- } \
- \
- /* \
- * node is a leaf, so it contains values rather than node \
- * pointers. \
- */ \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
- bits); \
- { \
- uint8_t *leaf = (uint8_t *)node; \
- ret = leaf[subkey]; \
- } \
- RTREE_UNLOCK(&rtree->mutex); \
- \
- RTREE_GET_VALIDATE \
- return (ret); \
+JEMALLOC_INLINE unsigned
+rtree_start_level(rtree_t *rtree, uintptr_t key)
+{
+ unsigned start_level;
+
+ if (unlikely(key == 0))
+ return (rtree->height - 1);
+
+ start_level = rtree->start_level[lg_floor(key) >>
+ LG_RTREE_BITS_PER_LEVEL];
+ assert(start_level < rtree->height);
+ return (start_level);
}
-#ifdef JEMALLOC_DEBUG
-# define RTREE_LOCK(l) malloc_mutex_lock(l)
-# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
-# define RTREE_GET_VALIDATE
-RTREE_GET_GENERATE(rtree_get_locked)
-# undef RTREE_LOCK
-# undef RTREE_UNLOCK
-# undef RTREE_GET_VALIDATE
-#endif
+JEMALLOC_INLINE uintptr_t
+rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
+{
-#define RTREE_LOCK(l)
-#define RTREE_UNLOCK(l)
-#ifdef JEMALLOC_DEBUG
- /*
- * Suppose that it were possible for a jemalloc-allocated chunk to be
- * munmap()ped, followed by a different allocator in another thread re-using
- * overlapping virtual memory, all without invalidating the cached rtree
- * value. The result would be a false positive (the rtree would claim that
- * jemalloc owns memory that it had actually discarded). This scenario
- * seems impossible, but the following assertion is a prudent sanity check.
- */
-# define RTREE_GET_VALIDATE \
- assert(rtree_get_locked(rtree, key) == ret);
-#else
-# define RTREE_GET_VALIDATE
-#endif
-RTREE_GET_GENERATE(rtree_get)
-#undef RTREE_LOCK
-#undef RTREE_UNLOCK
-#undef RTREE_GET_VALIDATE
+ return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+ rtree->levels[level].cumbits)) & ((ZU(1) <<
+ rtree->levels[level].bits) - 1));
+}
JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val)
+rtree_node_valid(rtree_node_elm_t *node)
+{
+
+ return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm)
+{
+ rtree_node_elm_t *child;
+
+ /* Double-checked read (first read may be stale. */
+ child = elm->child;
+ if (!rtree_node_valid(child))
+ child = atomic_read_p((void **)&elm->child);
+ return (child);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+{
+ rtree_node_elm_t *child;
+
+ child = rtree_child_tryread(elm);
+ if (unlikely(!rtree_node_valid(child)))
+ child = rtree_child_read_hard(rtree, elm, level);
+ return (child);
+}
+
+JEMALLOC_INLINE void *
+rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm)
+{
+
+ return (atomic_read_p(&elm->val));
+}
+
+JEMALLOC_INLINE void
+rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val)
+{
+
+ atomic_write_p(&elm->val, val);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ /* Double-checked read (first read may be stale. */
+ subtree = rtree->levels[level].subtree;
+ if (!rtree_node_valid(subtree))
+ subtree = atomic_read_p((void **)&rtree->levels[level].subtree);
+ return (subtree);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ subtree = rtree_subtree_tryread(rtree, level);
+ if (unlikely(!rtree_node_valid(subtree)))
+ subtree = rtree_subtree_read_hard(rtree, level);
+ return (subtree);
+}
+
+JEMALLOC_INLINE void *
+rtree_get(rtree_t *rtree, uintptr_t key)
{
uintptr_t subkey;
- unsigned i, lshift, height, bits;
- void **node, **child;
-
- malloc_mutex_lock(&rtree->mutex);
- for (i = lshift = 0, height = rtree->height, node = rtree->root;
- i < height - 1;
- i++, lshift += bits, node = child) {
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
- bits);
- child = (void**)node[subkey];
- if (child == NULL) {
- size_t size = ((i + 1 < height - 1) ? sizeof(void *)
- : (sizeof(uint8_t))) << rtree->level2bits[i+1];
- child = (void**)rtree->alloc(size);
- if (child == NULL) {
- malloc_mutex_unlock(&rtree->mutex);
- return (true);
- }
- memset(child, 0, size);
- node[subkey] = child;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
+
+ start_level = rtree_start_level(rtree, key);
+
+ for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
+ /**/; i++, node = child) {
+ if (unlikely(!rtree_node_valid(node)))
+ return (NULL);
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ return (rtree_val_read(rtree, &node[subkey]));
}
+ assert(i < rtree->height - 1);
+ child = rtree_child_tryread(&node[subkey]);
}
+ not_reached();
+}
- /* node is a leaf, so it contains values rather than node pointers. */
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
- {
- uint8_t *leaf = (uint8_t *)node;
- leaf[subkey] = val;
- }
- malloc_mutex_unlock(&rtree->mutex);
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+ uintptr_t subkey;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
- return (false);
+ start_level = rtree_start_level(rtree, key);
+
+ node = rtree_subtree_read(rtree, start_level);
+ if (node == NULL)
+ return (true);
+ for (i = start_level; /**/; i++, node = child) {
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ rtree_val_write(rtree, &node[subkey], val);
+ return (false);
+ }
+ assert(i < rtree->height - 1);
+ child = rtree_child_read(rtree, &node[subkey], i);
+ if (child == NULL)
+ return (true);
+ }
+ not_reached();
}
#endif