From 44f9bd147a3df96e42adbe7ad4f0195763105bbe Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Tue, 23 May 2017 14:26:31 -0700 Subject: Header refactoring: unify and de-catchall rtree module. --- include/jemalloc/internal/arena_inlines_b.h | 1 + include/jemalloc/internal/extent_externs.h | 1 + .../jemalloc/internal/jemalloc_internal_includes.h | 4 - .../internal/jemalloc_internal_inlines_b.h | 2 + include/jemalloc/internal/rtree.h | 474 +++++++++++++++++++++ include/jemalloc/internal/rtree_ctx.h | 22 - include/jemalloc/internal/rtree_externs.h | 45 -- include/jemalloc/internal/rtree_inlines.h | 350 --------------- include/jemalloc/internal/rtree_structs.h | 53 --- include/jemalloc/internal/rtree_tsd.h | 50 +++ include/jemalloc/internal/rtree_types.h | 69 --- include/jemalloc/internal/tsd.h | 2 +- src/arena.c | 1 + src/extent.c | 1 + src/jemalloc.c | 1 + src/large.c | 1 + src/tsd.c | 1 + test/unit/arena_reset.c | 2 + test/unit/rtree.c | 2 + test/unit/spin.c | 2 + 20 files changed, 540 insertions(+), 544 deletions(-) create mode 100644 include/jemalloc/internal/rtree.h delete mode 100644 include/jemalloc/internal/rtree_ctx.h delete mode 100644 include/jemalloc/internal/rtree_externs.h delete mode 100644 include/jemalloc/internal/rtree_inlines.h delete mode 100644 include/jemalloc/internal/rtree_structs.h create mode 100644 include/jemalloc/internal/rtree_tsd.h delete mode 100644 include/jemalloc/internal/rtree_types.h diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 8db6e9a..16635c1 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/ticker.h" diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h index 9d5daf5..96a7112 100644 --- a/include/jemalloc/internal/extent_externs.h +++ b/include/jemalloc/internal/extent_externs.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/ph.h" #include "jemalloc/internal/rb.h" +#include "jemalloc/internal/rtree.h" extern rtree_t extents_rtree; extern const extent_hooks_t extent_hooks_default; diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index b1a6f17..770bcaa 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -44,7 +44,6 @@ #include "jemalloc/internal/extent_dss_types.h" #include "jemalloc/internal/base_types.h" #include "jemalloc/internal/arena_types.h" -#include "jemalloc/internal/rtree_types.h" #include "jemalloc/internal/tcache_types.h" #include "jemalloc/internal/prof_types.h" @@ -59,7 +58,6 @@ #include "jemalloc/internal/base_structs.h" #include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs_b.h" -#include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" #include "jemalloc/internal/background_thread_structs.h" @@ -73,7 +71,6 @@ #include "jemalloc/internal/extent_mmap_externs.h" #include "jemalloc/internal/base_externs.h" #include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/rtree_externs.h" #include "jemalloc/internal/large_externs.h" #include "jemalloc/internal/tcache_externs.h" #include "jemalloc/internal/prof_externs.h" @@ -85,7 +82,6 @@ #include "jemalloc/internal/mutex_pool_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/rtree_inlines.h" #include "jemalloc/internal/base_inlines.h" /* * Include portions of arena code interleaved with tcache code in order to diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h index cfc5209..3749316 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -1,6 +1,8 @@ #ifndef JEMALLOC_INTERNAL_INLINES_B_H #define JEMALLOC_INTERNAL_INLINES_B_H +#include "jemalloc/internal/rtree.h" + /* Choose an arena based on a per-thread value. */ static inline arena_t * arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h new file mode 100644 index 0000000..b5d4db3 --- /dev/null +++ b/include/jemalloc/internal/rtree.h @@ -0,0 +1,474 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_H +#define JEMALLOC_INTERNAL_RTREE_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree_tsd.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd.h" + +/* + * This radix tree implementation is tailored to the singular purpose of + * associating metadata with extents that are currently owned by jemalloc. + * + ******************************************************************************* + */ + +/* Number of high insignificant bits. */ +#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR) +/* Number of low insigificant bits. */ +#define RTREE_NLIB LG_PAGE +/* Number of significant bits. */ +#define RTREE_NSB (LG_VADDR - RTREE_NLIB) +/* Number of levels in radix tree. */ +#if RTREE_NSB <= 10 +# define RTREE_HEIGHT 1 +#elif RTREE_NSB <= 36 +# define RTREE_HEIGHT 2 +#elif RTREE_NSB <= 52 +# define RTREE_HEIGHT 3 +#else +# error Unsupported number of significant virtual address bits +#endif +/* Use compact leaf representation if virtual address encoding allows. */ +#if RTREE_NHIB >= LG_CEIL_NSIZES +# define RTREE_LEAF_COMPACT +#endif + +/* Needed for initialization only. */ +#define RTREE_LEAFKEY_INVALID ((uintptr_t)1) + +typedef struct rtree_node_elm_s rtree_node_elm_t; +struct rtree_node_elm_s { + atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */ +}; + +struct rtree_leaf_elm_s { +#ifdef RTREE_LEAF_COMPACT + /* + * Single pointer-width field containing all three leaf element fields. + * For example, on a 64-bit x64 system with 48 significant virtual + * memory address bits, the index, extent, and slab fields are packed as + * such: + * + * x: index + * e: extent + * b: slab + * + * 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b + */ + atomic_p_t le_bits; +#else + atomic_p_t le_extent; /* (extent_t *) */ + atomic_u_t le_szind; /* (szind_t) */ + atomic_b_t le_slab; /* (bool) */ +#endif +}; + +typedef struct rtree_level_s rtree_level_t; +struct rtree_level_s { + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + +typedef struct rtree_s rtree_t; +struct rtree_s { + malloc_mutex_t init_lock; + /* Number of elements based on rtree_levels[0].bits. */ +#if RTREE_HEIGHT > 1 + rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#else + rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#endif +}; + +/* + * Split the bits into one to three partitions depending on number of + * significant bits. It the number of bits does not divide evenly into the + * number of levels, place one remainder bit per level starting at the leaf + * level. + */ +static const rtree_level_t rtree_levels[] = { +#if RTREE_HEIGHT == 1 + {RTREE_NSB, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 2 + {RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2}, + {RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 3 + {RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3}, + {RTREE_NSB/3 + RTREE_NSB%3/2, + RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2}, + {RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB} +#else +# error Unsupported rtree height +#endif +}; + +bool rtree_new(rtree_t *rtree, bool zeroed); + +typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc; + +typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc; + +typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *); +extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc; + +typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *); +extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc; +#ifdef JEMALLOC_JET +void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); +#endif +rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leafkey(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + uintptr_t mask = ~((ZU(1) << maskbits) - 1); + return (key & mask); +} + +JEMALLOC_ALWAYS_INLINE size_t +rtree_cache_direct_map(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1)); +} + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_subkey(uintptr_t key, unsigned level) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = rtree_levels[level].cumbits; + unsigned shiftbits = ptrbits - cumbits; + unsigned maskbits = rtree_levels[level].bits; + uintptr_t mask = (ZU(1) << maskbits) - 1; + return ((key >> shiftbits) & mask); +} + +/* + * Atomic getters. + * + * dependent: Reading a value on behalf of a pointer to a valid allocation + * is guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ +# ifdef RTREE_LEAF_COMPACT +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { + return (uintptr_t)atomic_load_p(&elm->le_bits, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_bits_extent_get(uintptr_t bits) { + /* Restore sign-extended high bits, mask slab bit. */ + return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> + RTREE_NHIB) & ~((uintptr_t)0x1)); +} + +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_bits_szind_get(uintptr_t bits) { + return (szind_t)(bits >> LG_VADDR); +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_leaf_elm_bits_slab_get(uintptr_t bits) { + return (bool)(bits & (uintptr_t)0x1); +} + +# endif + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_extent_get(bits); +#else + extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); + return extent; +#endif +} + +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_szind_get(bits); +#else + return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED + : ATOMIC_ACQUIRE); +#endif +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_slab_get(bits); +#else + return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED : + ATOMIC_ACQUIRE); +#endif +} + +static inline void +rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + extent_t *extent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) + | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE); +#endif +} + +static inline void +rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + szind_t szind) { + assert(szind <= NSIZES); + +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE); +#endif +} + +static inline void +rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE); +#endif +} + +static inline void +rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + extent_t *extent, szind_t szind, bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); + /* + * Write extent last, since the element is atomically considered valid + * as soon as the extent field is non-NULL. + */ + rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent); +#endif +} + +static inline void +rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); + + /* + * The caller implicitly assures that it is the only writer to the szind + * and slab fields, and that the extent field cannot currently change. + */ + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); +} + +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, bool init_missing) { + assert(key != 0); + assert(!dependent || !init_missing); + + size_t slot = rtree_cache_direct_map(key); + uintptr_t leafkey = rtree_leafkey(key); + assert(leafkey != RTREE_LEAFKEY_INVALID); + + /* Fast path: L1 direct mapped cache. */ + if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) { + rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf; + assert(leaf != NULL); + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); + return &leaf[subkey]; + } + /* + * Search the L2 LRU cache. On hit, swap the matching element into the + * slot in L1 cache, and move the position in L2 up by 1. + */ +#define RTREE_CACHE_CHECK_L2(i) do { \ + if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \ + rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \ + assert(leaf != NULL); \ + if (i > 0) { \ + /* Bubble up by one. */ \ + rtree_ctx->l2_cache[i].leafkey = \ + rtree_ctx->l2_cache[i - 1].leafkey; \ + rtree_ctx->l2_cache[i].leaf = \ + rtree_ctx->l2_cache[i - 1].leaf; \ + rtree_ctx->l2_cache[i - 1].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[i - 1].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } else { \ + rtree_ctx->l2_cache[0].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[0].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } \ + rtree_ctx->cache[slot].leafkey = leafkey; \ + rtree_ctx->cache[slot].leaf = leaf; \ + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \ + return &leaf[subkey]; \ + } \ +} while (0) + /* Check the first cache entry. */ + RTREE_CACHE_CHECK_L2(0); + /* Search the remaining cache elements. */ + for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) { + RTREE_CACHE_CHECK_L2(i); + } +#undef RTREE_CACHE_CHECK_L2 + + return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, + dependent, init_missing); +} + +static inline bool +rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + extent_t *extent, szind_t szind, bool slab) { + /* Use rtree_clear() to set the extent to NULL. */ + assert(extent != NULL); + + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, false, true); + if (elm == NULL) { + return true; + } + + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab); + + return false; +} + +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + bool dependent) { + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, dependent, false); + if (!dependent && elm == NULL) { + return NULL; + } + assert(elm != NULL); + return elm; +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NULL; + } + return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); +} + +JEMALLOC_ALWAYS_INLINE szind_t +rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NSIZES; + } + return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); +} + +/* + * rtree_slab_read() is intentionally omitted because slab is always read in + * conjunction with szind, which makes rtree_szind_slab_read() a better choice. + */ + +JEMALLOC_ALWAYS_INLINE bool +rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; + } + *r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; + } + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); + return false; +} + +static inline void +rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); + + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab); +} + +static inline void +rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) != + NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false); +} + +#endif /* JEMALLOC_INTERNAL_RTREE_H */ diff --git a/include/jemalloc/internal/rtree_ctx.h b/include/jemalloc/internal/rtree_ctx.h deleted file mode 100644 index fe2c8bd..0000000 --- a/include/jemalloc/internal/rtree_ctx.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H -#define JEMALLOC_INTERNAL_RTREE_CTX_H - -#include "jemalloc/internal/rtree_types.h" - -typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; -struct rtree_ctx_cache_elm_s { - uintptr_t leafkey; - rtree_leaf_elm_t *leaf; -}; - -typedef struct rtree_ctx_s rtree_ctx_t; -struct rtree_ctx_s { - /* Direct mapped cache. */ - rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; - /* L2 LRU cache. */ - rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; -}; - -void rtree_ctx_data_init(rtree_ctx_t *ctx); - -#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */ diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h deleted file mode 100644 index d7d8165..0000000 --- a/include/jemalloc/internal/rtree_externs.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_RTREE_EXTERNS_H -#define JEMALLOC_INTERNAL_RTREE_EXTERNS_H - -/* - * Split the bits into one to three partitions depending on number of - * significant bits. It the number of bits does not divide evenly into the - * number of levels, place one remainder bit per level starting at the leaf - * level. - */ -static const rtree_level_t rtree_levels[] = { -#if RTREE_HEIGHT == 1 - {RTREE_NSB, RTREE_NHIB + RTREE_NSB} -#elif RTREE_HEIGHT == 2 - {RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2}, - {RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB} -#elif RTREE_HEIGHT == 3 - {RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3}, - {RTREE_NSB/3 + RTREE_NSB%3/2, - RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2}, - {RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB} -#else -# error Unsupported rtree height -#endif -}; - -bool rtree_new(rtree_t *rtree, bool zeroed); - -typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); -extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc; - -typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t); -extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc; - -typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *); -extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc; - -typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *); -extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc; -#ifdef JEMALLOC_JET -void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); -#endif -rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, - rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); - -#endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */ diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h deleted file mode 100644 index 335a89c..0000000 --- a/include/jemalloc/internal/rtree_inlines.h +++ /dev/null @@ -1,350 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H -#define JEMALLOC_INTERNAL_RTREE_INLINES_H - -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/spin.h" - -JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_leafkey(uintptr_t key) { - unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); - unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - - rtree_levels[RTREE_HEIGHT-1].bits); - unsigned maskbits = ptrbits - cumbits; - uintptr_t mask = ~((ZU(1) << maskbits) - 1); - return (key & mask); -} - -JEMALLOC_ALWAYS_INLINE size_t -rtree_cache_direct_map(uintptr_t key) { - unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); - unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - - rtree_levels[RTREE_HEIGHT-1].bits); - unsigned maskbits = ptrbits - cumbits; - return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1)); -} - -JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_subkey(uintptr_t key, unsigned level) { - unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); - unsigned cumbits = rtree_levels[level].cumbits; - unsigned shiftbits = ptrbits - cumbits; - unsigned maskbits = rtree_levels[level].bits; - uintptr_t mask = (ZU(1) << maskbits) - 1; - return ((key >> shiftbits) & mask); -} - -/* - * Atomic getters. - * - * dependent: Reading a value on behalf of a pointer to a valid allocation - * is guaranteed to be a clean read even without synchronization, - * because the rtree update became visible in memory before the - * pointer came into existence. - * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be - * dependent on a previous rtree write, which means a stale read - * could result if synchronization were omitted here. - */ -# ifdef RTREE_LEAF_COMPACT -JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { - return (uintptr_t)atomic_load_p(&elm->le_bits, dependent - ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); -} - -JEMALLOC_ALWAYS_INLINE extent_t * -rtree_leaf_elm_bits_extent_get(uintptr_t bits) { - /* Restore sign-extended high bits, mask slab bit. */ - return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> - RTREE_NHIB) & ~((uintptr_t)0x1)); -} - -JEMALLOC_ALWAYS_INLINE szind_t -rtree_leaf_elm_bits_szind_get(uintptr_t bits) { - return (szind_t)(bits >> LG_VADDR); -} - -JEMALLOC_ALWAYS_INLINE bool -rtree_leaf_elm_bits_slab_get(uintptr_t bits) { - return (bool)(bits & (uintptr_t)0x1); -} - -# endif - -JEMALLOC_ALWAYS_INLINE extent_t * -rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); - return rtree_leaf_elm_bits_extent_get(bits); -#else - extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent - ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); - return extent; -#endif -} - -JEMALLOC_ALWAYS_INLINE szind_t -rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); - return rtree_leaf_elm_bits_szind_get(bits); -#else - return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED - : ATOMIC_ACQUIRE); -#endif -} - -JEMALLOC_ALWAYS_INLINE bool -rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); - return rtree_leaf_elm_bits_slab_get(bits); -#else - return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED : - ATOMIC_ACQUIRE); -#endif -} - -static inline void -rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - extent_t *extent) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); - uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << - LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) - | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); - atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); -#else - atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE); -#endif -} - -static inline void -rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - szind_t szind) { - assert(szind <= NSIZES); - -#ifdef RTREE_LEAF_COMPACT - uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, - true); - uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | - ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & - (((uintptr_t)0x1 << LG_VADDR) - 1)) | - ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); - atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); -#else - atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE); -#endif -} - -static inline void -rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool slab) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, - true); - uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << - LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & - (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab); - atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); -#else - atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE); -#endif -} - -static inline void -rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - extent_t *extent, szind_t szind, bool slab) { -#ifdef RTREE_LEAF_COMPACT - uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | - ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) | - ((uintptr_t)slab); - atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); -#else - rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); - rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); - /* - * Write extent last, since the element is atomically considered valid - * as soon as the extent field is non-NULL. - */ - rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent); -#endif -} - -static inline void -rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, - rtree_leaf_elm_t *elm, szind_t szind, bool slab) { - assert(!slab || szind < NBINS); - - /* - * The caller implicitly assures that it is the only writer to the szind - * and slab fields, and that the extent field cannot currently change. - */ - rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); - rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); -} - -JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * -rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent, bool init_missing) { - assert(key != 0); - assert(!dependent || !init_missing); - - size_t slot = rtree_cache_direct_map(key); - uintptr_t leafkey = rtree_leafkey(key); - assert(leafkey != RTREE_LEAFKEY_INVALID); - - /* Fast path: L1 direct mapped cache. */ - if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) { - rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf; - assert(leaf != NULL); - uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); - return &leaf[subkey]; - } - /* - * Search the L2 LRU cache. On hit, swap the matching element into the - * slot in L1 cache, and move the position in L2 up by 1. - */ -#define RTREE_CACHE_CHECK_L2(i) do { \ - if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \ - rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \ - assert(leaf != NULL); \ - if (i > 0) { \ - /* Bubble up by one. */ \ - rtree_ctx->l2_cache[i].leafkey = \ - rtree_ctx->l2_cache[i - 1].leafkey; \ - rtree_ctx->l2_cache[i].leaf = \ - rtree_ctx->l2_cache[i - 1].leaf; \ - rtree_ctx->l2_cache[i - 1].leafkey = \ - rtree_ctx->cache[slot].leafkey; \ - rtree_ctx->l2_cache[i - 1].leaf = \ - rtree_ctx->cache[slot].leaf; \ - } else { \ - rtree_ctx->l2_cache[0].leafkey = \ - rtree_ctx->cache[slot].leafkey; \ - rtree_ctx->l2_cache[0].leaf = \ - rtree_ctx->cache[slot].leaf; \ - } \ - rtree_ctx->cache[slot].leafkey = leafkey; \ - rtree_ctx->cache[slot].leaf = leaf; \ - uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \ - return &leaf[subkey]; \ - } \ -} while (0) - /* Check the first cache entry. */ - RTREE_CACHE_CHECK_L2(0); - /* Search the remaining cache elements. */ - for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) { - RTREE_CACHE_CHECK_L2(i); - } -#undef RTREE_CACHE_CHECK_L2 - - return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, - dependent, init_missing); -} - -static inline bool -rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, - extent_t *extent, szind_t szind, bool slab) { - /* Use rtree_clear() to set the extent to NULL. */ - assert(extent != NULL); - - rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, - key, false, true); - if (elm == NULL) { - return true; - } - - assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL); - rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab); - - return false; -} - -JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * -rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, - bool dependent) { - rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, - key, dependent, false); - if (!dependent && elm == NULL) { - return NULL; - } - assert(elm != NULL); - return elm; -} - -JEMALLOC_ALWAYS_INLINE extent_t * -rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent) { - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, - dependent); - if (!dependent && elm == NULL) { - return NULL; - } - return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); -} - -JEMALLOC_ALWAYS_INLINE szind_t -rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent) { - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, - dependent); - if (!dependent && elm == NULL) { - return NSIZES; - } - return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); -} - -/* - * rtree_slab_read() is intentionally omitted because slab is always read in - * conjunction with szind, which makes rtree_szind_slab_read() a better choice. - */ - -JEMALLOC_ALWAYS_INLINE bool -rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) { - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, - dependent); - if (!dependent && elm == NULL) { - return true; - } - *r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); - *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); - return false; -} - -JEMALLOC_ALWAYS_INLINE bool -rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) { - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, - dependent); - if (!dependent && elm == NULL) { - return true; - } - *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); - *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); - return false; -} - -static inline void -rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, szind_t szind, bool slab) { - assert(!slab || szind < NBINS); - - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); - rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab); -} - -static inline void -rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key) { - rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); - assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) != - NULL); - rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false); -} - -#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */ diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h deleted file mode 100644 index a02a1f6..0000000 --- a/include/jemalloc/internal/rtree_structs.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H -#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H - -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/mutex.h" - -struct rtree_node_elm_s { - atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */ -}; - -struct rtree_leaf_elm_s { -#ifdef RTREE_LEAF_COMPACT - /* - * Single pointer-width field containing all three leaf element fields. - * For example, on a 64-bit x64 system with 48 significant virtual - * memory address bits, the index, extent, and slab fields are packed as - * such: - * - * x: index - * e: extent - * b: slab - * - * 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b - */ - atomic_p_t le_bits; -#else - atomic_p_t le_extent; /* (extent_t *) */ - atomic_u_t le_szind; /* (szind_t) */ - atomic_b_t le_slab; /* (bool) */ -#endif -}; - -struct rtree_level_s { - /* Number of key bits distinguished by this level. */ - unsigned bits; - /* - * Cumulative number of key bits distinguished by traversing to - * corresponding tree level. - */ - unsigned cumbits; -}; - -struct rtree_s { - malloc_mutex_t init_lock; - /* Number of elements based on rtree_levels[0].bits. */ -#if RTREE_HEIGHT > 1 - rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; -#else - rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; -#endif -}; - -#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */ diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h new file mode 100644 index 0000000..3cdc862 --- /dev/null +++ b/include/jemalloc/internal/rtree_tsd.h @@ -0,0 +1,50 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H +#define JEMALLOC_INTERNAL_RTREE_CTX_H + +/* + * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively. Each + * entry supports an entire leaf, so the cache hit rate is typically high even + * with a small number of entries. In rare cases extent activity will straddle + * the boundary between two leaf nodes. Furthermore, an arena may use a + * combination of dss and mmap. Note that as memory usage grows past the amount + * that this cache can directly cover, the cache will become less effective if + * locality of reference is low, but the consequence is merely cache misses + * while traversing the tree nodes. + * + * The L1 direct mapped cache offers consistent and low cost on cache hit. + * However collision could affect hit rate negatively. This is resolved by + * combining with a L2 LRU cache, which requires linear search and re-ordering + * on access but suffers no collision. Note that, the cache will itself suffer + * cache misses if made overly large, plus the cost of linear search in the LRU + * cache. + */ +#define RTREE_CTX_LG_NCACHE 4 +#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE) +#define RTREE_CTX_NCACHE_L2 8 + +/* + * Zero initializer required for tsd initialization only. Proper initialization + * done via rtree_ctx_data_init(). + */ +#define RTREE_CTX_ZERO_INITIALIZER {{{0}}} + + +typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; + +typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; +struct rtree_ctx_cache_elm_s { + uintptr_t leafkey; + rtree_leaf_elm_t *leaf; +}; + +typedef struct rtree_ctx_s rtree_ctx_t; +struct rtree_ctx_s { + /* Direct mapped cache. */ + rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; + /* L2 LRU cache. */ + rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; +}; + +void rtree_ctx_data_init(rtree_ctx_t *ctx); + +#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */ diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h deleted file mode 100644 index fd0f140..0000000 --- a/include/jemalloc/internal/rtree_types.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_RTREE_TYPES_H -#define JEMALLOC_INTERNAL_RTREE_TYPES_H - -#include "jemalloc/internal/size_classes.h" - -/* - * This radix tree implementation is tailored to the singular purpose of - * associating metadata with extents that are currently owned by jemalloc. - * - ******************************************************************************* - */ - -typedef struct rtree_node_elm_s rtree_node_elm_t; -typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; -typedef struct rtree_level_s rtree_level_t; -typedef struct rtree_s rtree_t; - -/* Number of high insignificant bits. */ -#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR) -/* Number of low insigificant bits. */ -#define RTREE_NLIB LG_PAGE -/* Number of significant bits. */ -#define RTREE_NSB (LG_VADDR - RTREE_NLIB) -/* Number of levels in radix tree. */ -#if RTREE_NSB <= 10 -# define RTREE_HEIGHT 1 -#elif RTREE_NSB <= 36 -# define RTREE_HEIGHT 2 -#elif RTREE_NSB <= 52 -# define RTREE_HEIGHT 3 -#else -# error Unsupported number of significant virtual address bits -#endif -/* Use compact leaf representation if virtual address encoding allows. */ -#if RTREE_NHIB >= LG_CEIL_NSIZES -# define RTREE_LEAF_COMPACT -#endif - -/* Needed for initialization only. */ -#define RTREE_LEAFKEY_INVALID ((uintptr_t)1) - -/* - * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively. Each - * entry supports an entire leaf, so the cache hit rate is typically high even - * with a small number of entries. In rare cases extent activity will straddle - * the boundary between two leaf nodes. Furthermore, an arena may use a - * combination of dss and mmap. Note that as memory usage grows past the amount - * that this cache can directly cover, the cache will become less effective if - * locality of reference is low, but the consequence is merely cache misses - * while traversing the tree nodes. - * - * The L1 direct mapped cache offers consistent and low cost on cache hit. - * However collision could affect hit rate negatively. This is resolved by - * combining with a L2 LRU cache, which requires linear search and re-ordering - * on access but suffers no collision. Note that, the cache will itself suffer - * cache misses if made overly large, plus the cost of linear search in the LRU - * cache. - */ -#define RTREE_CTX_LG_NCACHE 4 -#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE) -#define RTREE_CTX_NCACHE_L2 8 - -/* - * Zero initializer required for tsd initialization only. Proper initialization - * done via rtree_ctx_data_init(). - */ -#define RTREE_CTX_ZERO_INITIALIZER {{{0}}} - -#endif /* JEMALLOC_INTERNAL_RTREE_TYPES_H */ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index c192a6c..f304e1d 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -6,7 +6,7 @@ #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/rtree_ctx.h" +#include "jemalloc/internal/rtree_tsd.h" #include "jemalloc/internal/tcache_types.h" #include "jemalloc/internal/tcache_structs.h" #include "jemalloc/internal/util.h" diff --git a/src/arena.c b/src/arena.c index 9b3ea23..3d0725f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/util.h" diff --git a/src/extent.c b/src/extent.c index 44e9878..2264a0c 100644 --- a/src/extent.c +++ b/src/extent.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ph.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/mutex.h" /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index ed22a25..00b645f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -8,6 +8,7 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/ticker.h" diff --git a/src/large.c b/src/large.c index 55ee352..27c9bc6 100644 --- a/src/large.c +++ b/src/large.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/util.h" /******************************************************************************/ diff --git a/src/tsd.c b/src/tsd.c index 525432b..29a5677 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" /******************************************************************************/ /* Data. */ diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c index d169832..678ae57 100644 --- a/test/unit/arena_reset.c +++ b/test/unit/arena_reset.c @@ -2,6 +2,8 @@ #include "test/jemalloc_test.h" #endif +#include "jemalloc/internal/rtree.h" + #include "test/extent_hooks.h" static unsigned diff --git a/test/unit/rtree.c b/test/unit/rtree.c index b854afd..752dde9 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -1,5 +1,7 @@ #include "test/jemalloc_test.h" +#include "jemalloc/internal/rtree.h" + rtree_node_alloc_t *rtree_node_alloc_orig; rtree_node_dalloc_t *rtree_node_dalloc_orig; rtree_leaf_alloc_t *rtree_leaf_alloc_orig; diff --git a/test/unit/spin.c b/test/unit/spin.c index bd368b3..b965f74 100644 --- a/test/unit/spin.c +++ b/test/unit/spin.c @@ -1,5 +1,7 @@ #include "test/jemalloc_test.h" +#include "jemalloc/internal/spin.h" + TEST_BEGIN(test_spin) { spin_t spinner = SPIN_INITIALIZER; -- cgit v0.12