From 77cccac8cde9fb1f1555331814c4e6440c16de43 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Tue, 10 Jan 2017 18:06:31 -0800 Subject: Break up headers into constituent parts This is part of a broader change to make header files better represent the dependencies between one another (see https://github.com/jemalloc/jemalloc/issues/533). It breaks up component headers into smaller parts that can be made to have a simpler dependency graph. For the autogenerated headers (smoothstep.h and size_classes.h), no splitting was necessary, so I didn't add support to emit multiple headers. --- include/jemalloc/internal/arena.h | 638 ---------------- include/jemalloc/internal/arena_externs.h | 92 +++ include/jemalloc/internal/arena_inlines_a.h | 91 +++ include/jemalloc/internal/arena_inlines_b.h | 209 ++++++ include/jemalloc/internal/arena_structs_a.h | 15 + include/jemalloc/internal/arena_structs_b.h | 214 ++++++ include/jemalloc/internal/arena_types.h | 22 + include/jemalloc/internal/atomic.h | 651 ----------------- include/jemalloc/internal/atomic_externs.h | 12 + include/jemalloc/internal/atomic_inlines.h | 629 ++++++++++++++++ include/jemalloc/internal/base.h | 87 --- include/jemalloc/internal/base_externs.h | 18 + include/jemalloc/internal/base_inlines.h | 17 + include/jemalloc/internal/base_structs.h | 44 ++ include/jemalloc/internal/base_types.h | 7 + include/jemalloc/internal/bitmap.h | 322 -------- include/jemalloc/internal/bitmap_externs.h | 8 + include/jemalloc/internal/bitmap_inlines.h | 152 ++++ include/jemalloc/internal/bitmap_structs.h | 28 + include/jemalloc/internal/bitmap_types.h | 133 ++++ include/jemalloc/internal/ckh.h | 86 --- include/jemalloc/internal/ckh_externs.h | 18 + include/jemalloc/internal/ckh_structs.h | 41 ++ include/jemalloc/internal/ckh_types.h | 22 + include/jemalloc/internal/ctl.h | 127 ---- include/jemalloc/internal/ctl_externs.h | 43 ++ include/jemalloc/internal/ctl_structs.h | 68 ++ include/jemalloc/internal/ctl_types.h | 10 + include/jemalloc/internal/extent.h | 499 ------------- include/jemalloc/internal/extent_dss.h | 39 - include/jemalloc/internal/extent_dss_externs.h | 14 + include/jemalloc/internal/extent_dss_structs.h | 6 + include/jemalloc/internal/extent_dss_types.h | 14 + include/jemalloc/internal/extent_externs.h | 60 ++ include/jemalloc/internal/extent_inlines.h | 343 +++++++++ include/jemalloc/internal/extent_mmap.h | 21 - include/jemalloc/internal/extent_mmap_externs.h | 8 + include/jemalloc/internal/extent_structs.h | 84 +++ include/jemalloc/internal/extent_types.h | 8 + include/jemalloc/internal/hash.h | 357 --------- include/jemalloc/internal/hash_inlines.h | 345 +++++++++ include/jemalloc/internal/jemalloc_internal.h.in | 237 +++--- .../jemalloc/internal/jemalloc_internal_macros.h | 5 + include/jemalloc/internal/large.h | 40 - include/jemalloc/internal/large_externs.h | 27 + include/jemalloc/internal/mutex.h | 150 ---- include/jemalloc/internal/mutex_externs.h | 18 + include/jemalloc/internal/mutex_inlines.h | 74 ++ include/jemalloc/internal/mutex_structs.h | 24 + include/jemalloc/internal/mutex_types.h | 33 + include/jemalloc/internal/nstime.h | 48 -- include/jemalloc/internal/nstime_externs.h | 26 + include/jemalloc/internal/nstime_structs.h | 8 + include/jemalloc/internal/nstime_types.h | 9 + include/jemalloc/internal/pages.h | 85 --- include/jemalloc/internal/pages_externs.h | 31 + include/jemalloc/internal/pages_types.h | 44 ++ include/jemalloc/internal/prng.h | 207 ------ include/jemalloc/internal/prng_inlines.h | 169 +++++ include/jemalloc/internal/prng_types.h | 29 + include/jemalloc/internal/prof.h | 568 --------------- include/jemalloc/internal/prof_externs.h | 83 +++ include/jemalloc/internal/prof_inlines.h | 242 ++++++ include/jemalloc/internal/prof_structs.h | 187 +++++ include/jemalloc/internal/prof_types.h | 55 ++ include/jemalloc/internal/ql.h | 5 + include/jemalloc/internal/qr.h | 5 + include/jemalloc/internal/rtree.h | 608 --------------- include/jemalloc/internal/rtree_externs.h | 23 + include/jemalloc/internal/rtree_inlines.h | 442 +++++++++++ include/jemalloc/internal/rtree_structs.h | 86 +++ include/jemalloc/internal/rtree_types.h | 58 ++ include/jemalloc/internal/size_classes.sh | 23 +- include/jemalloc/internal/smoothstep.h | 22 +- include/jemalloc/internal/smoothstep.sh | 22 +- include/jemalloc/internal/spin.h | 51 -- include/jemalloc/internal/spin_inlines.h | 31 + include/jemalloc/internal/spin_structs.h | 8 + include/jemalloc/internal/spin_types.h | 6 + include/jemalloc/internal/stats.h | 130 ---- include/jemalloc/internal/stats_externs.h | 9 + include/jemalloc/internal/stats_structs.h | 107 +++ include/jemalloc/internal/stats_types.h | 9 + include/jemalloc/internal/tcache.h | 459 ------------ include/jemalloc/internal/tcache_externs.h | 47 ++ include/jemalloc/internal/tcache_inlines.h | 306 ++++++++ include/jemalloc/internal/tcache_structs.h | 55 ++ include/jemalloc/internal/tcache_types.h | 50 ++ include/jemalloc/internal/ticker.h | 75 -- include/jemalloc/internal/ticker_inlines.h | 55 ++ include/jemalloc/internal/ticker_structs.h | 9 + include/jemalloc/internal/ticker_types.h | 6 + include/jemalloc/internal/tsd.h | 811 --------------------- include/jemalloc/internal/tsd_externs.h | 18 + include/jemalloc/internal/tsd_inlines.h | 140 ++++ include/jemalloc/internal/tsd_structs.h | 73 ++ include/jemalloc/internal/tsd_types.h | 579 +++++++++++++++ include/jemalloc/internal/util.h | 346 --------- include/jemalloc/internal/util_externs.h | 23 + include/jemalloc/internal/util_inlines.h | 224 ++++++ include/jemalloc/internal/util_types.h | 94 +++ include/jemalloc/internal/witness.h | 275 ------- include/jemalloc/internal/witness_externs.h | 37 + include/jemalloc/internal/witness_inlines.h | 163 +++++ include/jemalloc/internal/witness_structs.h | 28 + include/jemalloc/internal/witness_types.h | 46 ++ test/include/test/jemalloc_test.h.in | 16 +- 107 files changed, 6711 insertions(+), 6870 deletions(-) delete mode 100644 include/jemalloc/internal/arena.h create mode 100644 include/jemalloc/internal/arena_externs.h create mode 100644 include/jemalloc/internal/arena_inlines_a.h create mode 100644 include/jemalloc/internal/arena_inlines_b.h create mode 100644 include/jemalloc/internal/arena_structs_a.h create mode 100644 include/jemalloc/internal/arena_structs_b.h create mode 100644 include/jemalloc/internal/arena_types.h delete mode 100644 include/jemalloc/internal/atomic.h create mode 100644 include/jemalloc/internal/atomic_externs.h create mode 100644 include/jemalloc/internal/atomic_inlines.h delete mode 100644 include/jemalloc/internal/base.h create mode 100644 include/jemalloc/internal/base_externs.h create mode 100644 include/jemalloc/internal/base_inlines.h create mode 100644 include/jemalloc/internal/base_structs.h create mode 100644 include/jemalloc/internal/base_types.h delete mode 100644 include/jemalloc/internal/bitmap.h create mode 100644 include/jemalloc/internal/bitmap_externs.h create mode 100644 include/jemalloc/internal/bitmap_inlines.h create mode 100644 include/jemalloc/internal/bitmap_structs.h create mode 100644 include/jemalloc/internal/bitmap_types.h delete mode 100644 include/jemalloc/internal/ckh.h create mode 100644 include/jemalloc/internal/ckh_externs.h create mode 100644 include/jemalloc/internal/ckh_structs.h create mode 100644 include/jemalloc/internal/ckh_types.h delete mode 100644 include/jemalloc/internal/ctl.h create mode 100644 include/jemalloc/internal/ctl_externs.h create mode 100644 include/jemalloc/internal/ctl_structs.h create mode 100644 include/jemalloc/internal/ctl_types.h delete mode 100644 include/jemalloc/internal/extent.h delete mode 100644 include/jemalloc/internal/extent_dss.h create mode 100644 include/jemalloc/internal/extent_dss_externs.h create mode 100644 include/jemalloc/internal/extent_dss_structs.h create mode 100644 include/jemalloc/internal/extent_dss_types.h create mode 100644 include/jemalloc/internal/extent_externs.h create mode 100644 include/jemalloc/internal/extent_inlines.h delete mode 100644 include/jemalloc/internal/extent_mmap.h create mode 100644 include/jemalloc/internal/extent_mmap_externs.h create mode 100644 include/jemalloc/internal/extent_structs.h create mode 100644 include/jemalloc/internal/extent_types.h delete mode 100644 include/jemalloc/internal/hash.h create mode 100644 include/jemalloc/internal/hash_inlines.h delete mode 100644 include/jemalloc/internal/large.h create mode 100644 include/jemalloc/internal/large_externs.h delete mode 100644 include/jemalloc/internal/mutex.h create mode 100644 include/jemalloc/internal/mutex_externs.h create mode 100644 include/jemalloc/internal/mutex_inlines.h create mode 100644 include/jemalloc/internal/mutex_structs.h create mode 100644 include/jemalloc/internal/mutex_types.h delete mode 100644 include/jemalloc/internal/nstime.h create mode 100644 include/jemalloc/internal/nstime_externs.h create mode 100644 include/jemalloc/internal/nstime_structs.h create mode 100644 include/jemalloc/internal/nstime_types.h delete mode 100644 include/jemalloc/internal/pages.h create mode 100644 include/jemalloc/internal/pages_externs.h create mode 100644 include/jemalloc/internal/pages_types.h delete mode 100644 include/jemalloc/internal/prng.h create mode 100644 include/jemalloc/internal/prng_inlines.h create mode 100644 include/jemalloc/internal/prng_types.h delete mode 100644 include/jemalloc/internal/prof.h create mode 100644 include/jemalloc/internal/prof_externs.h create mode 100644 include/jemalloc/internal/prof_inlines.h create mode 100644 include/jemalloc/internal/prof_structs.h create mode 100644 include/jemalloc/internal/prof_types.h delete mode 100644 include/jemalloc/internal/rtree.h create mode 100644 include/jemalloc/internal/rtree_externs.h create mode 100644 include/jemalloc/internal/rtree_inlines.h create mode 100644 include/jemalloc/internal/rtree_structs.h create mode 100644 include/jemalloc/internal/rtree_types.h delete mode 100644 include/jemalloc/internal/spin.h create mode 100644 include/jemalloc/internal/spin_inlines.h create mode 100644 include/jemalloc/internal/spin_structs.h create mode 100644 include/jemalloc/internal/spin_types.h delete mode 100644 include/jemalloc/internal/stats.h create mode 100644 include/jemalloc/internal/stats_externs.h create mode 100644 include/jemalloc/internal/stats_structs.h create mode 100644 include/jemalloc/internal/stats_types.h delete mode 100644 include/jemalloc/internal/tcache.h create mode 100644 include/jemalloc/internal/tcache_externs.h create mode 100644 include/jemalloc/internal/tcache_inlines.h create mode 100644 include/jemalloc/internal/tcache_structs.h create mode 100644 include/jemalloc/internal/tcache_types.h delete mode 100644 include/jemalloc/internal/ticker.h create mode 100644 include/jemalloc/internal/ticker_inlines.h create mode 100644 include/jemalloc/internal/ticker_structs.h create mode 100644 include/jemalloc/internal/ticker_types.h delete mode 100644 include/jemalloc/internal/tsd.h create mode 100644 include/jemalloc/internal/tsd_externs.h create mode 100644 include/jemalloc/internal/tsd_inlines.h create mode 100644 include/jemalloc/internal/tsd_structs.h create mode 100644 include/jemalloc/internal/tsd_types.h delete mode 100644 include/jemalloc/internal/util.h create mode 100644 include/jemalloc/internal/util_externs.h create mode 100644 include/jemalloc/internal/util_inlines.h create mode 100644 include/jemalloc/internal/util_types.h delete mode 100644 include/jemalloc/internal/witness.h create mode 100644 include/jemalloc/internal/witness_externs.h create mode 100644 include/jemalloc/internal/witness_inlines.h create mode 100644 include/jemalloc/internal/witness_structs.h create mode 100644 include/jemalloc/internal/witness_types.h diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h deleted file mode 100644 index 5e29550..0000000 --- a/include/jemalloc/internal/arena.h +++ /dev/null @@ -1,638 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) - -/* Maximum number of regions in one slab. */ -#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN) -#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS) - -/* Default decay time in seconds. */ -#define DECAY_TIME_DEFAULT 10 -/* Number of event ticks between time checks. */ -#define DECAY_NTICKS_PER_UPDATE 1000 - -typedef struct arena_slab_data_s arena_slab_data_t; -typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_decay_s arena_decay_t; -typedef struct arena_bin_s arena_bin_t; -typedef struct arena_s arena_t; -typedef struct arena_tdata_s arena_tdata_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#ifdef JEMALLOC_ARENA_STRUCTS_A -struct arena_slab_data_s { - /* Index of bin this slab is associated with. */ - szind_t binind; - - /* Number of free regions in slab. */ - unsigned nfree; - - /* Per region allocated/deallocated bitmap. */ - bitmap_t bitmap[BITMAP_GROUPS_MAX]; -}; -#endif /* JEMALLOC_ARENA_STRUCTS_A */ - -#ifdef JEMALLOC_ARENA_STRUCTS_B -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each slab has the following layout: - * - * /--------------------\ - * | region 0 | - * |--------------------| - * | region 1 | - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | region nregs-1 | - * \--------------------/ - */ -struct arena_bin_info_s { - /* Size of regions in a slab for this bin's size class. */ - size_t reg_size; - - /* Total size of a slab for this bin's size class. */ - size_t slab_size; - - /* Total number of regions in a slab for this bin's size class. */ - uint32_t nregs; - - /* - * Metadata used to manipulate bitmaps for slabs associated with this - * bin. - */ - bitmap_info_t bitmap_info; -}; - -struct arena_decay_s { - /* - * Approximate time in seconds from the creation of a set of unused - * dirty pages until an equivalent set of unused dirty pages is purged - * and/or reused. - */ - ssize_t time; - /* time / SMOOTHSTEP_NSTEPS. */ - nstime_t interval; - /* - * Time at which the current decay interval logically started. We do - * not actually advance to a new epoch until sometime after it starts - * because of scheduling and computation delays, and it is even possible - * to completely skip epochs. In all cases, during epoch advancement we - * merge all relevant activity into the most recently recorded epoch. - */ - nstime_t epoch; - /* Deadline randomness generator. */ - uint64_t jitter_state; - /* - * Deadline for current epoch. This is the sum of interval and per - * epoch jitter which is a uniform random variable in [0..interval). - * Epochs always advance by precise multiples of interval, but we - * randomize the deadline to reduce the likelihood of arenas purging in - * lockstep. - */ - nstime_t deadline; - /* - * Number of dirty pages at beginning of current epoch. During epoch - * advancement we use the delta between arena->decay.ndirty and - * arena->ndirty to determine how many dirty pages, if any, were - * generated. - */ - size_t nunpurged; - /* - * Trailing log of how many unused dirty pages were generated during - * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last - * element is the most recent epoch. Corresponding epoch times are - * relative to epoch. - */ - size_t backlog[SMOOTHSTEP_NSTEPS]; -}; - -struct arena_bin_s { - /* All operations on arena_bin_t fields require lock ownership. */ - malloc_mutex_t lock; - - /* - * Current slab being used to service allocations of this bin's size - * class. slabcur is independent of slabs_{nonfull,full}; whenever - * slabcur is reassigned, the previous slab must be deallocated or - * inserted into slabs_{nonfull,full}. - */ - extent_t *slabcur; - - /* - * Heap of non-full slabs. This heap is used to assure that new - * allocations come from the non-full slab that is oldest/lowest in - * memory. - */ - extent_heap_t slabs_nonfull; - - /* Ring sentinel used to track full slabs. */ - extent_t slabs_full; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - -struct arena_s { - /* - * Number of threads currently assigned to this arena, synchronized via - * atomic operations. Each thread has two distinct assignments, one for - * application-serving allocation, and the other for internal metadata - * allocation. Internal metadata must not be allocated from arenas - * explicitly created via the arenas.create mallctl, because the - * arena..reset mallctl indiscriminately discards all allocations for - * the affected arena. - * - * 0: Application allocation. - * 1: Internal metadata allocation. - */ - unsigned nthreads[2]; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread assignment (modifies nthreads) is synchronized via atomics. - * 2) Bin-related operations are protected by bin locks. - * 3) Extent-related operations are protected by this mutex. - */ - malloc_mutex_t lock; - - arena_stats_t stats; - /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. - */ - ql_head(tcache_t) tcache_ql; - - uint64_t prof_accumbytes; - - /* - * PRNG state for cache index randomization of large allocation base - * pointers. - */ - size_t offset_state; - - /* Extent serial number generator state. */ - size_t extent_sn_next; - - dss_prec_t dss_prec; - - /* True if a thread is currently executing arena_purge_to_limit(). */ - bool purging; - - /* Number of pages in active extents. */ - size_t nactive; - - /* - * Current count of pages within unused extents that are potentially - * dirty, and for which pages_purge_*() has not been called. By - * tracking this, we can institute a limit on how much dirty unused - * memory is mapped for each arena. - */ - size_t ndirty; - - /* Decay-based purging state. */ - arena_decay_t decay; - - /* Extant large allocations. */ - ql_head(extent_t) large; - /* Synchronizes all large allocation/update/deallocation. */ - malloc_mutex_t large_mtx; - - /* - * Heaps of extents that were previously allocated. These are used when - * allocating extents, in an attempt to re-use address space. - */ - extent_heap_t extents_cached[NPSIZES+1]; - extent_heap_t extents_retained[NPSIZES+1]; - /* - * Ring sentinel used to track unused dirty memory. Dirty memory is - * managed as an LRU of cached extents. - */ - extent_t extents_dirty; - /* Protects extents_{cached,retained,dirty}. */ - malloc_mutex_t extents_mtx; - - /* - * Next extent size class in a growing series to use when satisfying a - * request via the extent hooks (only if !config_munmap). This limits - * the number of disjoint virtual memory ranges so that extent merging - * can be effective even if multiple arenas' extent allocation requests - * are highly interleaved. - */ - pszind_t extent_grow_next; - - /* Cache of extent structures that were allocated via base_alloc(). */ - ql_head(extent_t) extent_cache; - malloc_mutex_t extent_cache_mtx; - - /* bins is used to store heaps of free regions. */ - arena_bin_t bins[NBINS]; - - /* Base allocator, from which arena metadata are allocated. */ - base_t *base; -}; - -/* Used in conjunction with tsd for fast arena-related context lookup. */ -struct arena_tdata_s { - ticker_t decay_ticker; -}; -#endif /* JEMALLOC_ARENA_STRUCTS_B */ - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -static const size_t large_pad = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - PAGE -#else - 0 -#endif - ; - -extern ssize_t opt_decay_time; - -extern const arena_bin_info_t arena_bin_info[NBINS]; - -extent_t *arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, - size_t alignment, bool *zero); -void arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent); -void arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, bool cache); -void arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, bool cache); -#ifdef JEMALLOC_JET -size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); -#endif -extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, - size_t usize, size_t alignment, bool *zero); -void arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, bool locked); -void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, size_t oldsize); -void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, size_t oldsize); -ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); -bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); -void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); -void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); -void arena_reset(tsd_t *tsd, arena_t *arena); -void arena_destroy(tsd_t *tsd, arena_t *arena); -void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, - bool zero); -#ifdef JEMALLOC_JET -typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); -extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; -#else -void arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info); -#endif -void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, - szind_t ind, bool zero); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize); -void arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr, - tcache_t *tcache, bool slow_path); -void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, void *ptr); -void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, - void *ptr); -bool arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, - size_t oldsize, size_t size, size_t extra, bool zero); -void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr, - size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); -bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); -ssize_t arena_decay_time_default_get(void); -bool arena_decay_time_default_set(ssize_t decay_time); -void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, - unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive, - size_t *ndirty); -void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); -unsigned arena_nthreads_get(arena_t *arena, bool internal); -void arena_nthreads_inc(arena_t *arena, bool internal); -void arena_nthreads_dec(arena_t *arena, bool internal); -size_t arena_extent_sn_next(arena_t *arena); -arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); -void arena_boot(void); -void arena_prefork0(tsdn_t *tsdn, arena_t *arena); -void arena_prefork1(tsdn_t *tsdn, arena_t *arena); -void arena_prefork2(tsdn_t *tsdn, arena_t *arena); -void arena_prefork3(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned arena_ind_get(const arena_t *arena); -void arena_internal_add(arena_t *arena, size_t size); -void arena_internal_sub(arena_t *arena, size_t size); -size_t arena_internal_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); -szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, - const void *ptr); -void arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx); -void arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx); -void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); -void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); -void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache, bool slow_path); -arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr); -size_t arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr); -void arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, - tcache_t *tcache, bool slow_path); -void arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size, - tcache_t *tcache, bool slow_path); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -# ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE unsigned -arena_ind_get(const arena_t *arena) -{ - - return (base_ind_get(arena->base)); -} - -JEMALLOC_INLINE void -arena_internal_add(arena_t *arena, size_t size) -{ - - atomic_add_zu(&arena->stats.internal, size); -} - -JEMALLOC_INLINE void -arena_internal_sub(arena_t *arena, size_t size) -{ - - atomic_sub_zu(&arena->stats.internal, size); -} - -JEMALLOC_INLINE size_t -arena_internal_get(arena_t *arena) -{ - - return (atomic_read_zu(&arena->stats.internal)); -} - -JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes %= prof_interval; - return (true); - } - return (false); -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - return (arena_prof_accum_impl(arena, accumbytes)); -} - -JEMALLOC_INLINE bool -arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - - { - bool ret; - - malloc_mutex_lock(tsdn, &arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); - return (ret); - } -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE szind_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - szind_t binind = (szind_t)(bin - arena->bins); - assert(binind < NBINS); - return (binind); -} - -JEMALLOC_INLINE prof_tctx_t * -arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) -{ - - cassert(config_prof); - assert(ptr != NULL); - - if (unlikely(!extent_slab_get(extent))) - return (large_prof_tctx_get(tsdn, extent)); - return ((prof_tctx_t *)(uintptr_t)1U); -} - -JEMALLOC_INLINE void -arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - if (unlikely(!extent_slab_get(extent))) - large_prof_tctx_set(tsdn, extent, tctx); -} - -JEMALLOC_INLINE void -arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - assert(!extent_slab_get(extent)); - - large_prof_tctx_reset(tsdn, extent); -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) -{ - tsd_t *tsd; - ticker_t *decay_ticker; - - if (unlikely(tsdn_null(tsdn))) - return; - tsd = tsdn_tsd(tsdn); - decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena)); - if (unlikely(decay_ticker == NULL)) - return; - if (unlikely(ticker_ticks(decay_ticker, nticks))) - arena_purge(tsdn, arena, false); -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_tick(tsdn_t *tsdn, arena_t *arena) -{ - - malloc_mutex_assert_not_owner(tsdn, &arena->lock); - - arena_decay_ticks(tsdn, arena, 1); -} - -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool slow_path) -{ - - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(size != 0); - - if (likely(tcache != NULL)) { - if (likely(size <= SMALL_MAXCLASS)) { - return (tcache_alloc_small(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - if (likely(size <= tcache_maxclass)) { - return (tcache_alloc_large(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - /* (size > tcache_maxclass) case falls through. */ - assert(size > tcache_maxclass); - } - - return (arena_malloc_hard(tsdn, arena, size, ind, zero)); -} - -JEMALLOC_ALWAYS_INLINE arena_t * -arena_aalloc(tsdn_t *tsdn, const void *ptr) -{ - - return (extent_arena_get(iealloc(tsdn, ptr))); -} - -/* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) -{ - size_t ret; - - assert(ptr != NULL); - - if (likely(extent_slab_get(extent))) - ret = index2size(extent_slab_data_get_const(extent)->binind); - else - ret = large_salloc(tsdn, extent); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache, - bool slow_path) -{ - - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - - if (likely(extent_slab_get(extent))) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = extent_slab_data_get(extent)->binind; - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind, - slow_path); - } else { - arena_dalloc_small(tsdn, extent_arena_get(extent), - extent, ptr); - } - } else { - size_t usize = extent_usize_get(extent); - - if (likely(tcache != NULL) && usize <= tcache_maxclass) { - if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) { - arena_dalloc_promoted(tsdn, extent, ptr, - tcache, slow_path); - } else { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, - ptr, usize, slow_path); - } - } else - large_dalloc(tsdn, extent); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size, - tcache_t *tcache, bool slow_path) -{ - - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - - if (likely(extent_slab_get(extent))) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = size2index(size); - assert(binind == extent_slab_data_get(extent)->binind); - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind, - slow_path); - } else { - arena_dalloc_small(tsdn, extent_arena_get(extent), - extent, ptr); - } - } else { - if (likely(tcache != NULL) && size <= tcache_maxclass) { - if (config_prof && unlikely(size <= SMALL_MAXCLASS)) { - arena_dalloc_promoted(tsdn, extent, ptr, - tcache, slow_path); - } else { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - size, slow_path); - } - } else - large_dalloc(tsdn, extent); - } -} -# endif /* JEMALLOC_ARENA_INLINE_B */ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h new file mode 100644 index 0000000..ecc8230 --- /dev/null +++ b/include/jemalloc/internal/arena_externs.h @@ -0,0 +1,92 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H + +static const size_t large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; + +extern ssize_t opt_decay_time; + +extern const arena_bin_info_t arena_bin_info[NBINS]; + +extent_t *arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, + size_t alignment, bool *zero); +void arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +void arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, bool cache); +void arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, bool cache); +#ifdef JEMALLOC_JET +size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); +#endif +extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, + size_t usize, size_t alignment, bool *zero); +void arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, bool locked); +void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); +bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); +void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); +void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_destroy(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, + tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, + bool zero); +#ifdef JEMALLOC_JET +typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); +extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; +#else +void arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info); +#endif +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); +void arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize); +void arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr, + tcache_t *tcache, bool slow_path); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, void *ptr); +void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, + void *ptr); +bool arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, + size_t oldsize, size_t size, size_t extra, bool zero); +void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr, + size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); +dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); +bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_decay_time_default_get(void); +bool arena_decay_time_default_set(ssize_t decay_time); +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, + unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive, + size_t *ndirty); +void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +size_t arena_extent_sn_next(arena_t *arena); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); + +#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h new file mode 100644 index 0000000..743727b --- /dev/null +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -0,0 +1,91 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned arena_ind_get(const arena_t *arena); +void arena_internal_add(arena_t *arena, size_t size); +void arena_internal_sub(arena_t *arena, size_t size); +size_t arena_internal_get(arena_t *arena); +bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); +#endif /* JEMALLOC_ENABLE_INLINE */ + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) + +JEMALLOC_INLINE unsigned +arena_ind_get(const arena_t *arena) +{ + + return (base_ind_get(arena->base)); +} + +JEMALLOC_INLINE void +arena_internal_add(arena_t *arena, size_t size) +{ + + atomic_add_zu(&arena->stats.internal, size); +} + +JEMALLOC_INLINE void +arena_internal_sub(arena_t *arena, size_t size) +{ + + atomic_sub_zu(&arena->stats.internal, size); +} + +JEMALLOC_INLINE size_t +arena_internal_get(arena_t *arena) +{ + + return (atomic_read_zu(&arena->stats.internal)); +} + +JEMALLOC_INLINE bool +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + arena->prof_accumbytes %= prof_interval; + return (true); + } + return (false); +} + +JEMALLOC_INLINE bool +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (likely(prof_interval == 0)) + return (false); + return (arena_prof_accum_impl(arena, accumbytes)); +} + +JEMALLOC_INLINE bool +arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (likely(prof_interval == 0)) + return (false); + + { + bool ret; + + malloc_mutex_lock(tsdn, &arena->lock); + ret = arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(tsdn, &arena->lock); + return (ret); + } +} + +#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */ + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */ diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h new file mode 100644 index 0000000..9068cf4 --- /dev/null +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -0,0 +1,209 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H + +#ifndef JEMALLOC_ENABLE_INLINE +szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, + const void *ptr); +void arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx); +void arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx); +void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); +void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); +void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); +arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr); +size_t arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr); +void arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, + tcache_t *tcache, bool slow_path); +void arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size, + tcache_t *tcache, bool slow_path); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE szind_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + szind_t binind = (szind_t)(bin - arena->bins); + assert(binind < NBINS); + return (binind); +} + +JEMALLOC_INLINE prof_tctx_t * +arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(!extent_slab_get(extent))) + return (large_prof_tctx_get(tsdn, extent)); + return ((prof_tctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(!extent_slab_get(extent))) + large_prof_tctx_set(tsdn, extent, tctx); +} + +JEMALLOC_INLINE void +arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + assert(!extent_slab_get(extent)); + + large_prof_tctx_reset(tsdn, extent); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) +{ + tsd_t *tsd; + ticker_t *decay_ticker; + + if (unlikely(tsdn_null(tsdn))) + return; + tsd = tsdn_tsd(tsdn); + decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena)); + if (unlikely(decay_ticker == NULL)) + return; + if (unlikely(ticker_ticks(decay_ticker, nticks))) + arena_purge(tsdn, arena, false); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) +{ + + malloc_mutex_assert_not_owner(tsdn, &arena->lock); + + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) +{ + + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(size != 0); + + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return (tcache_alloc_small(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { + return (tcache_alloc_large(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + + return (arena_malloc_hard(tsdn, arena, size, ind, zero)); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(tsdn_t *tsdn, const void *ptr) +{ + + return (extent_arena_get(iealloc(tsdn, ptr))); +} + +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) +{ + size_t ret; + + assert(ptr != NULL); + + if (likely(extent_slab_get(extent))) + ret = index2size(extent_slab_data_get_const(extent)->binind); + else + ret = large_salloc(tsdn, extent); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache, + bool slow_path) +{ + + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (likely(extent_slab_get(extent))) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = extent_slab_data_get(extent)->binind; + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind, + slow_path); + } else { + arena_dalloc_small(tsdn, extent_arena_get(extent), + extent, ptr); + } + } else { + size_t usize = extent_usize_get(extent); + + if (likely(tcache != NULL) && usize <= tcache_maxclass) { + if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) { + arena_dalloc_promoted(tsdn, extent, ptr, + tcache, slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, + ptr, usize, slow_path); + } + } else + large_dalloc(tsdn, extent); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size, + tcache_t *tcache, bool slow_path) +{ + + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (likely(extent_slab_get(extent))) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = size2index(size); + assert(binind == extent_slab_data_get(extent)->binind); + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind, + slow_path); + } else { + arena_dalloc_small(tsdn, extent_arena_get(extent), + extent, ptr); + } + } else { + if (likely(tcache != NULL) && size <= tcache_maxclass) { + if (config_prof && unlikely(size <= SMALL_MAXCLASS)) { + arena_dalloc_promoted(tsdn, extent, ptr, + tcache, slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + size, slow_path); + } + } else + large_dalloc(tsdn, extent); + } +} + +#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */ +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h new file mode 100644 index 0000000..ccb3b05 --- /dev/null +++ b/include/jemalloc/internal/arena_structs_a.h @@ -0,0 +1,15 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H + +struct arena_slab_data_s { + /* Index of bin this slab is associated with. */ + szind_t binind; + + /* Number of free regions in slab. */ + unsigned nfree; + + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */ diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h new file mode 100644 index 0000000..c1c2073 --- /dev/null +++ b/include/jemalloc/internal/arena_structs_b.h @@ -0,0 +1,214 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each slab has the following layout: + * + * /--------------------\ + * | region 0 | + * |--------------------| + * | region 1 | + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | region nregs-1 | + * \--------------------/ + */ +struct arena_bin_info_s { + /* Size of regions in a slab for this bin's size class. */ + size_t reg_size; + + /* Total size of a slab for this bin's size class. */ + size_t slab_size; + + /* Total number of regions in a slab for this bin's size class. */ + uint32_t nregs; + + /* + * Metadata used to manipulate bitmaps for slabs associated with this + * bin. + */ + bitmap_info_t bitmap_info; +}; + +struct arena_decay_s { + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t time; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay.ndirty and + * arena->ndirty to determine how many dirty pages, if any, were + * generated. + */ + size_t nunpurged; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; +}; + +struct arena_bin_s { + /* All operations on arena_bin_t fields require lock ownership. */ + malloc_mutex_t lock; + + /* + * Current slab being used to service allocations of this bin's size + * class. slabcur is independent of slabs_{nonfull,full}; whenever + * slabcur is reassigned, the previous slab must be deallocated or + * inserted into slabs_{nonfull,full}. + */ + extent_t *slabcur; + + /* + * Heap of non-full slabs. This heap is used to assure that new + * allocations come from the non-full slab that is oldest/lowest in + * memory. + */ + extent_heap_t slabs_nonfull; + + /* Ring sentinel used to track full slabs. */ + extent_t slabs_full; + + /* Bin statistics. */ + malloc_bin_stats_t stats; +}; + +struct arena_s { + /* + * Number of threads currently assigned to this arena, synchronized via + * atomic operations. Each thread has two distinct assignments, one for + * application-serving allocation, and the other for internal metadata + * allocation. Internal metadata must not be allocated from arenas + * explicitly created via the arenas.create mallctl, because the + * arena..reset mallctl indiscriminately discards all allocations for + * the affected arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + */ + unsigned nthreads[2]; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread assignment (modifies nthreads) is synchronized via atomics. + * 2) Bin-related operations are protected by bin locks. + * 3) Extent-related operations are protected by this mutex. + */ + malloc_mutex_t lock; + + arena_stats_t stats; + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. + */ + ql_head(tcache_t) tcache_ql; + + uint64_t prof_accumbytes; + + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + */ + size_t offset_state; + + /* Extent serial number generator state. */ + size_t extent_sn_next; + + dss_prec_t dss_prec; + + /* True if a thread is currently executing arena_purge_to_limit(). */ + bool purging; + + /* Number of pages in active extents. */ + size_t nactive; + + /* + * Current count of pages within unused extents that are potentially + * dirty, and for which pages_purge_*() has not been called. By + * tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* Decay-based purging state. */ + arena_decay_t decay; + + /* Extant large allocations. */ + ql_head(extent_t) large; + /* Synchronizes all large allocation/update/deallocation. */ + malloc_mutex_t large_mtx; + + /* + * Heaps of extents that were previously allocated. These are used when + * allocating extents, in an attempt to re-use address space. + */ + extent_heap_t extents_cached[NPSIZES+1]; + extent_heap_t extents_retained[NPSIZES+1]; + /* + * Ring sentinel used to track unused dirty memory. Dirty memory is + * managed as an LRU of cached extents. + */ + extent_t extents_dirty; + /* Protects extents_{cached,retained,dirty}. */ + malloc_mutex_t extents_mtx; + + /* + * Next extent size class in a growing series to use when satisfying a + * request via the extent hooks (only if !config_munmap). This limits + * the number of disjoint virtual memory ranges so that extent merging + * can be effective even if multiple arenas' extent allocation requests + * are highly interleaved. + */ + pszind_t extent_grow_next; + + /* Cache of extent structures that were allocated via base_alloc(). */ + ql_head(extent_t) extent_cache; + malloc_mutex_t extent_cache_mtx; + + /* bins is used to store heaps of free regions. */ + arena_bin_t bins[NBINS]; + + /* Base allocator, from which arena metadata are allocated. */ + base_t *base; +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */ diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h new file mode 100644 index 0000000..a13a1b6 --- /dev/null +++ b/include/jemalloc/internal/arena_types.h @@ -0,0 +1,22 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H +#define JEMALLOC_INTERNAL_ARENA_TYPES_H + +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) + +/* Maximum number of regions in one slab. */ +#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN) +#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS) + +/* Default decay time in seconds. */ +#define DECAY_TIME_DEFAULT 10 +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + +typedef struct arena_slab_data_s arena_slab_data_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; + +#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h deleted file mode 100644 index 4b5b4ea..0000000 --- a/include/jemalloc/internal/atomic.h +++ /dev/null @@ -1,651 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -#define atomic_read_u64(p) atomic_add_u64(p, 0) -#endif -#define atomic_read_u32(p) atomic_add_u32(p, 0) -#define atomic_read_p(p) atomic_add_p(p, NULL) -#define atomic_read_zu(p) atomic_add_zu(p, 0) -#define atomic_read_u(p) atomic_add_u(p, 0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -/* - * All arithmetic functions return the arithmetic result of the atomic - * operation. Some atomic operation APIs return the value prior to mutation, in - * which case the following functions must redundantly compute the result so - * that it can be returned. These functions are normally inlined, so the extra - * operations can be optimized away if the return values aren't used by the - * callers. - * - * atomic_read_( *p) { return (*p); } - * atomic_add_( *p, x) { return (*p += x); } - * atomic_sub_( *p, x) { return (*p -= x); } - * bool atomic_cas_( *p, c, s) - * { - * if (*p != c) - * return (true); - * *p = s; - * return (false); - * } - * void atomic_write_( *p, x) { *p = x; } - */ - -#ifndef JEMALLOC_ENABLE_INLINE -# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -uint64_t atomic_add_u64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); -bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); -void atomic_write_u64(uint64_t *p, uint64_t x); -# endif -uint32_t atomic_add_u32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_u32(uint32_t *p, uint32_t x); -bool atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s); -void atomic_write_u32(uint32_t *p, uint32_t x); -void *atomic_add_p(void **p, void *x); -void *atomic_sub_p(void **p, void *x); -bool atomic_cas_p(void **p, void *c, void *s); -void atomic_write_p(void **p, const void *x); -size_t atomic_add_zu(size_t *p, size_t x); -size_t atomic_sub_zu(size_t *p, size_t x); -bool atomic_cas_zu(size_t *p, size_t c, size_t s); -void atomic_write_zu(size_t *p, size_t x); -unsigned atomic_add_u(unsigned *p, unsigned x); -unsigned atomic_sub_u(unsigned *p, unsigned x); -bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); -void atomic_write_u(unsigned *p, unsigned x); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# if (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - uint64_t t = x; - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - uint64_t t; - - x = (uint64_t)(-(int64_t)x); - t = x; - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgq %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" /* Clobbers. */ - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "xchgq %1, %0;" /* Lock is implied by xchgq. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - atomic_store(a, x); -} -# elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - atomic_store_rel_long(p, x); -} -# elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - uint64_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_u64(p); - } while (atomic_cas_u64(p, o, x)); -} -# elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint64_t o; - - o = InterlockedCompareExchange64(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - - InterlockedExchange64(p, x); -} -# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) -{ - - __sync_lock_test_and_set(p, x); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif - -/******************************************************************************/ -/* 32-bit operations. */ -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - uint32_t t = x; - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - uint32_t t; - - x = (uint32_t)(-(int32_t)x); - t = x; - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgl %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "xchgl %1, %0;" /* Lock is implied by xchgl. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - atomic_store(a, x); -} -#elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!atomic_cmpset_32(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - - atomic_store_rel_32(p, x); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - uint32_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_u32(p); - } while (atomic_cas_u32(p, o, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint32_t o; - - o = InterlockedCompareExchange(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - - InterlockedExchange(p, x); -} -#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) -{ - - __sync_lock_test_and_set(p, x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* Pointer operations. */ -JEMALLOC_INLINE void * -atomic_add_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE void * -atomic_sub_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_p(void **p, void *c, void *s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_p(void **p, const void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* size_t operations. */ -JEMALLOC_INLINE size_t -atomic_add_zu(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE size_t -atomic_sub_zu(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_zu(size_t *p, size_t c, size_t s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_zu(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -JEMALLOC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_u64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_u32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_u(unsigned *p, unsigned c, unsigned s) -{ - -#if (LG_SIZEOF_INT == 3) - return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_INT == 2) - return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h new file mode 100644 index 0000000..002aebc --- /dev/null +++ b/include/jemalloc/internal/atomic_externs.h @@ -0,0 +1,12 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H +#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H + +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#define atomic_read_u64(p) atomic_add_u64(p, 0) +#endif +#define atomic_read_u32(p) atomic_add_u32(p, 0) +#define atomic_read_p(p) atomic_add_p(p, NULL) +#define atomic_read_zu(p) atomic_add_zu(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) + +#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */ diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h new file mode 100644 index 0000000..de0ac6a --- /dev/null +++ b/include/jemalloc/internal/atomic_inlines.h @@ -0,0 +1,629 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H +#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H + +/* + * All arithmetic functions return the arithmetic result of the atomic + * operation. Some atomic operation APIs return the value prior to mutation, in + * which case the following functions must redundantly compute the result so + * that it can be returned. These functions are normally inlined, so the extra + * operations can be optimized away if the return values aren't used by the + * callers. + * + * atomic_read_( *p) { return (*p); } + * atomic_add_( *p, x) { return (*p += x); } + * atomic_sub_( *p, x) { return (*p -= x); } + * bool atomic_cas_( *p, c, s) + * { + * if (*p != c) + * return (true); + * *p = s; + * return (false); + * } + * void atomic_write_( *p, x) { *p = x; } + */ + +#ifndef JEMALLOC_ENABLE_INLINE +# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +uint64_t atomic_add_u64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); +bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); +void atomic_write_u64(uint64_t *p, uint64_t x); +# endif +uint32_t atomic_add_u32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_u32(uint32_t *p, uint32_t x); +bool atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s); +void atomic_write_u32(uint32_t *p, uint32_t x); +void *atomic_add_p(void **p, void *x); +void *atomic_sub_p(void **p, void *x); +bool atomic_cas_p(void **p, void *c, void *s); +void atomic_write_p(void **p, const void *x); +size_t atomic_add_zu(size_t *p, size_t x); +size_t atomic_sub_zu(size_t *p, size_t x); +bool atomic_cas_zu(size_t *p, size_t c, size_t s); +void atomic_write_zu(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); +bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); +void atomic_write_u(unsigned *p, unsigned x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# if (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + uint64_t t = x; + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + uint64_t t; + + x = (uint64_t)(-(int64_t)x); + t = x; + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgq %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" /* Clobbers. */ + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "xchgq %1, %0;" /* Lock is implied by xchgq. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + atomic_store(a, x); +} +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + atomic_store_rel_long(p, x); +} +# elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + uint64_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_u64(p); + } while (atomic_cas_u64(p, o, x)); +} +# elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint64_t o; + + o = InterlockedCompareExchange64(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + + InterlockedExchange64(p, x); +} +# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_u64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_u64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} + +JEMALLOC_INLINE bool +atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_u64(uint64_t *p, uint64_t x) +{ + + __sync_lock_test_and_set(p, x); +} +# else +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + uint32_t t = x; + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + uint32_t t; + + x = (uint32_t)(-(int32_t)x); + t = x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgl %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "xchgl %1, %0;" /* Lock is implied by xchgl. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + atomic_store(a, x); +} +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_cmpset_32(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + + atomic_store_rel_32(p, x); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + uint32_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_u32(p); + } while (atomic_cas_u32(p, o, x)); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint32_t o; + + o = InterlockedCompareExchange(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + + InterlockedExchange(p, x); +} +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +JEMALLOC_INLINE uint32_t +atomic_add_u32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_u32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} + +JEMALLOC_INLINE bool +atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_u32(uint32_t *p, uint32_t x) +{ + + __sync_lock_test_and_set(p, x); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif + +/******************************************************************************/ +/* Pointer operations. */ +JEMALLOC_INLINE void * +atomic_add_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE void * +atomic_sub_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_p(void **p, void *c, void *s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_p(void **p, const void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_u64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_u32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_zu(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_zu(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_zu(size_t *p, size_t c, size_t s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_zu(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_u64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_u32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_u64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_u32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_u(unsigned *p, unsigned c, unsigned s) +{ + +#if (LG_SIZEOF_INT == 3) + return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_INT == 2) + return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + atomic_write_u64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 2) + atomic_write_u32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +#endif +#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */ diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h deleted file mode 100644 index a54a550..0000000 --- a/include/jemalloc/internal/base.h +++ /dev/null @@ -1,87 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct base_block_s base_block_t; -typedef struct base_s base_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Embedded at the beginning of every block of base-managed virtual memory. */ -struct base_block_s { - /* Total size of block's virtual memory mapping. */ - size_t size; - - /* Next block in list of base's blocks. */ - base_block_t *next; - - /* Tracks unused trailing space. */ - extent_t extent; -}; - -struct base_s { - /* Associated arena's index within the arenas array. */ - unsigned ind; - - /* User-configurable extent hook functions. */ - union { - extent_hooks_t *extent_hooks; - void *extent_hooks_pun; - }; - - /* Protects base_alloc() and base_stats_get() operations. */ - malloc_mutex_t mtx; - - /* Serial number generation state. */ - size_t extent_sn_next; - - /* Chain of all blocks associated with base. */ - base_block_t *blocks; - - /* Heap of extents that track unused trailing space within blocks. */ - extent_heap_t avail[NSIZES]; - - /* Stats, only maintained if config_stats. */ - size_t allocated; - size_t resident; - size_t mapped; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -base_t *b0get(void); -base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); -void base_delete(base_t *base); -extent_hooks_t *base_extent_hooks_get(base_t *base); -extent_hooks_t *base_extent_hooks_set(base_t *base, - extent_hooks_t *extent_hooks); -void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); -void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, - size_t *resident, size_t *mapped); -void base_prefork(tsdn_t *tsdn, base_t *base); -void base_postfork_parent(tsdn_t *tsdn, base_t *base); -void base_postfork_child(tsdn_t *tsdn, base_t *base); -bool base_boot(tsdn_t *tsdn); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned base_ind_get(const base_t *base); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_)) -JEMALLOC_INLINE unsigned -base_ind_get(const base_t *base) -{ - - return (base->ind); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h new file mode 100644 index 0000000..2c555cf --- /dev/null +++ b/include/jemalloc/internal/base_externs.h @@ -0,0 +1,18 @@ +#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H +#define JEMALLOC_INTERNAL_BASE_EXTERNS_H + +base_t *b0get(void); +base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void base_delete(base_t *base); +extent_hooks_t *base_extent_hooks_get(base_t *base); +extent_hooks_t *base_extent_hooks_set(base_t *base, + extent_hooks_t *extent_hooks); +void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); +void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, + size_t *resident, size_t *mapped); +void base_prefork(tsdn_t *tsdn, base_t *base); +void base_postfork_parent(tsdn_t *tsdn, base_t *base); +void base_postfork_child(tsdn_t *tsdn, base_t *base); +bool base_boot(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */ diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h new file mode 100644 index 0000000..f882bcd --- /dev/null +++ b/include/jemalloc/internal/base_inlines.h @@ -0,0 +1,17 @@ +#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H +#define JEMALLOC_INTERNAL_BASE_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned base_ind_get(const base_t *base); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_)) +JEMALLOC_INLINE unsigned +base_ind_get(const base_t *base) +{ + + return (base->ind); +} +#endif + +#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */ diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h new file mode 100644 index 0000000..bad37c0 --- /dev/null +++ b/include/jemalloc/internal/base_structs.h @@ -0,0 +1,44 @@ +#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H +#define JEMALLOC_INTERNAL_BASE_STRUCTS_H + +/* Embedded at the beginning of every block of base-managed virtual memory. */ +struct base_block_s { + /* Total size of block's virtual memory mapping. */ + size_t size; + + /* Next block in list of base's blocks. */ + base_block_t *next; + + /* Tracks unused trailing space. */ + extent_t extent; +}; + +struct base_s { + /* Associated arena's index within the arenas array. */ + unsigned ind; + + /* User-configurable extent hook functions. */ + union { + extent_hooks_t *extent_hooks; + void *extent_hooks_pun; + }; + + /* Protects base_alloc() and base_stats_get() operations. */ + malloc_mutex_t mtx; + + /* Serial number generation state. */ + size_t extent_sn_next; + + /* Chain of all blocks associated with base. */ + base_block_t *blocks; + + /* Heap of extents that track unused trailing space within blocks. */ + extent_heap_t avail[NSIZES]; + + /* Stats, only maintained if config_stats. */ + size_t allocated; + size_t resident; + size_t mapped; +}; + +#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h new file mode 100644 index 0000000..be7ee82 --- /dev/null +++ b/include/jemalloc/internal/base_types.h @@ -0,0 +1,7 @@ +#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H +#define JEMALLOC_INTERNAL_BASE_TYPES_H + +typedef struct base_block_s base_block_t; +typedef struct base_s base_t; + +#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h deleted file mode 100644 index c2e3455..0000000 --- a/include/jemalloc/internal/bitmap.h +++ /dev/null @@ -1,322 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS -#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) - -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; -typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG - -/* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) - -/* - * Do some analysis on how big the bitmap is before we use a tree. For a brute - * force linear search, if we would have to call ffs_lu() more than 2^3 times, - * use a tree instead. - */ -#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 -# define BITMAP_USE_TREE -#endif - -/* Number of groups required to store a given number of bits. */ -#define BITMAP_BITS2GROUPS(nbits) \ - (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) - -/* - * Number of groups required at a particular level for a given number of bits. - */ -#define BITMAP_GROUPS_L0(nbits) \ - BITMAP_BITS2GROUPS(nbits) -#define BITMAP_GROUPS_L1(nbits) \ - BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) -#define BITMAP_GROUPS_L2(nbits) \ - BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) -#define BITMAP_GROUPS_L3(nbits) \ - BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ - BITMAP_BITS2GROUPS((nbits))))) -#define BITMAP_GROUPS_L4(nbits) \ - BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ - BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))) - -/* - * Assuming the number of levels, number of groups required for a given number - * of bits. - */ -#define BITMAP_GROUPS_1_LEVEL(nbits) \ - BITMAP_GROUPS_L0(nbits) -#define BITMAP_GROUPS_2_LEVEL(nbits) \ - (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) -#define BITMAP_GROUPS_3_LEVEL(nbits) \ - (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) -#define BITMAP_GROUPS_4_LEVEL(nbits) \ - (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) -#define BITMAP_GROUPS_5_LEVEL(nbits) \ - (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits)) - -/* - * Maximum number of groups required to support LG_BITMAP_MAXBITS. - */ -#ifdef BITMAP_USE_TREE - -#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS -# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) -#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 -# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) -#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 -# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) -#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 -# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) -#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5 -# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS) -#else -# error "Unsupported bitmap size" -#endif - -/* - * Maximum number of levels possible. This could be statically computed based - * on LG_BITMAP_MAXBITS: - * - * #define BITMAP_MAX_LEVELS \ - * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) - * - * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so - * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the - * various cascading macros. The only additional cost this incurs is some - * unused trailing entries in bitmap_info_t structures; the bitmaps themselves - * are not impacted. - */ -#define BITMAP_MAX_LEVELS 5 - -#define BITMAP_INFO_INITIALIZER(nbits) { \ - /* nbits. */ \ - nbits, \ - /* nlevels. */ \ - (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \ - (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \ - (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \ - (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \ - /* levels. */ \ - { \ - {0}, \ - {BITMAP_GROUPS_L0(nbits)}, \ - {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ - {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \ - BITMAP_GROUPS_L0(nbits)}, \ - {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \ - BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ - {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \ - BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \ - + BITMAP_GROUPS_L0(nbits)} \ - } \ -} - -#else /* BITMAP_USE_TREE */ - -#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) - -#define BITMAP_INFO_INITIALIZER(nbits) { \ - /* nbits. */ \ - nbits, \ - /* ngroups. */ \ - BITMAP_BITS2GROUPS(nbits) \ -} - -#endif /* BITMAP_USE_TREE */ - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct bitmap_level_s { - /* Offset of this level's groups within the array of groups. */ - size_t group_offset; -}; - -struct bitmap_info_s { - /* Logical number of bits in bitmap (stored at bottom level). */ - size_t nbits; - -#ifdef BITMAP_USE_TREE - /* Number of levels necessary for nbits. */ - unsigned nlevels; - - /* - * Only the first (nlevels+1) elements are used, and levels are ordered - * bottom to top (e.g. the bottom level is stored in levels[0]). - */ - bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -#else /* BITMAP_USE_TREE */ - /* Number of groups necessary for nbits. */ - size_t ngroups; -#endif /* BITMAP_USE_TREE */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); -size_t bitmap_size(const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ -#ifdef BITMAP_USE_TREE - size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; - bitmap_t rg = bitmap[rgoff]; - /* The bitmap is full iff the root group is 0. */ - return (rg == 0); -#else - size_t i; - - for (i = 0; i < binfo->ngroups; i++) { - if (bitmap[i] != 0) - return (false); - } - return (true); -#endif -} - -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t g; - - assert(bit < binfo->nbits); - goff = bit >> LG_BITMAP_GROUP_NBITS; - g = bitmap[goff]; - return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); -} - -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - - assert(bit < binfo->nbits); - assert(!bitmap_get(bitmap, binfo, bit)); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit)); -#ifdef BITMAP_USE_TREE - /* Propagate group state transitions up the tree. */ - if (g == 0) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (g != 0) - break; - } - } -#endif -} - -/* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t bit; - bitmap_t g; - unsigned i; - - assert(!bitmap_full(bitmap, binfo)); - -#ifdef BITMAP_USE_TREE - i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; - bit = ffs_lu(g) - 1; - while (i > 0) { - i--; - g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); - } -#else - i = 0; - g = bitmap[0]; - while ((bit = ffs_lu(g)) == 0) { - i++; - g = bitmap[i]; - } - bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); -#endif - bitmap_set(bitmap, binfo, bit); - return (bit); -} - -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - UNUSED bool propagate; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit)); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - propagate = (g == 0); - assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(!bitmap_get(bitmap, binfo, bit)); -#ifdef BITMAP_USE_TREE - /* Propagate group state transitions up the tree. */ - if (propagate) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - propagate = (g == 0); - assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) - == 0); - g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (!propagate) - break; - } - } -#endif /* BITMAP_USE_TREE */ -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/bitmap_externs.h b/include/jemalloc/internal/bitmap_externs.h new file mode 100644 index 0000000..4df63eb --- /dev/null +++ b/include/jemalloc/internal/bitmap_externs.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_BITMAP_EXTERNS_H +#define JEMALLOC_INTERNAL_BITMAP_EXTERNS_H + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); +size_t bitmap_size(const bitmap_info_t *binfo); + +#endif /* JEMALLOC_INTERNAL_BITMAP_EXTERNS_H */ diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h new file mode 100644 index 0000000..5400f9d --- /dev/null +++ b/include/jemalloc/internal/bitmap_inlines.h @@ -0,0 +1,152 @@ +#ifndef JEMALLOC_INTERNAL_BITMAP_INLINES_H +#define JEMALLOC_INTERNAL_BITMAP_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ +#ifdef BITMAP_USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) + return (false); + } + return (true); +#endif +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(!bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +#endif +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(!bitmap_full(bitmap, binfo)); + +#ifdef BITMAP_USE_TREE + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffs_lu(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); + } +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); +#endif + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + UNUSED bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (!propagate) + break; + } + } +#endif /* BITMAP_USE_TREE */ +} + +#endif + +#endif /* JEMALLOC_INTERNAL_BITMAP_INLINES_H */ diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h new file mode 100644 index 0000000..297ae66 --- /dev/null +++ b/include/jemalloc/internal/bitmap_structs.h @@ -0,0 +1,28 @@ +#ifndef JEMALLOC_INTERNAL_BITMAP_STRUCTS_H +#define JEMALLOC_INTERNAL_BITMAP_STRUCTS_H + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + +#ifdef BITMAP_USE_TREE + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* BITMAP_USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* BITMAP_USE_TREE */ +}; + +#endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */ diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h new file mode 100644 index 0000000..d823186 --- /dev/null +++ b/include/jemalloc/internal/bitmap_types.h @@ -0,0 +1,133 @@ +#ifndef JEMALLOC_INTERNAL_BITMAP_TYPES_H +#define JEMALLOC_INTERNAL_BITMAP_TYPES_H + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffs_lu() more than 2^3 times, + * use a tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define BITMAP_USE_TREE +#endif + +/* Number of groups required to store a given number of bits. */ +#define BITMAP_BITS2GROUPS(nbits) \ + (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) + +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) +#define BITMAP_GROUPS_L4(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) +#define BITMAP_GROUPS_5_LEVEL(nbits) \ + (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#ifdef BITMAP_USE_TREE + +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + +/* + * Maximum number of levels possible. This could be statically computed based + * on LG_BITMAP_MAXBITS: + * + * #define BITMAP_MAX_LEVELS \ + * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + * + * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so + * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the + * various cascading macros. The only additional cost this incurs is some + * unused trailing entries in bitmap_info_t structures; the bitmaps themselves + * are not impacted. + */ +#define BITMAP_MAX_LEVELS 5 + +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* nlevels. */ \ + (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \ + (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \ + (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \ + (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \ + /* levels. */ \ + { \ + {0}, \ + {BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \ + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \ + BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \ + BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \ + + BITMAP_GROUPS_L0(nbits)} \ + } \ +} + +#else /* BITMAP_USE_TREE */ + +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) + +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* ngroups. */ \ + BITMAP_BITS2GROUPS(nbits) \ +} + +#endif /* BITMAP_USE_TREE */ + +#endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h deleted file mode 100644 index f75ad90..0000000 --- a/include/jemalloc/internal/ckh.h +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ckh_s ckh_t; -typedef struct ckhc_s ckhc_t; - -/* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, size_t[2]); -typedef bool ckh_keycomp_t (const void *, const void *); - -/* Maintain counters used to get an idea of performance. */ -/* #define CKH_COUNT */ -/* Print counter values in ckh_delete() (requires CKH_COUNT). */ -/* #define CKH_VERBOSE */ - -/* - * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit - * one bucket per L1 cache line. - */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Hash table cell. */ -struct ckhc_s { - const void *key; - const void *data; -}; - -struct ckh_s { -#ifdef CKH_COUNT - /* Counters used to get an idea of performance. */ - uint64_t ngrows; - uint64_t nshrinks; - uint64_t nshrinkfails; - uint64_t ninserts; - uint64_t nrelocs; -#endif - - /* Used for pseudo-random number generation. */ - uint64_t prng_state; - - /* Total number of items. */ - size_t count; - - /* - * Minimum and current number of hash table buckets. There are - * 2^LG_CKH_BUCKET_CELLS cells per bucket. - */ - unsigned lg_minbuckets; - unsigned lg_curbuckets; - - /* Hash and comparison functions. */ - ckh_hash_t *hash; - ckh_keycomp_t *keycomp; - - /* Hash table with 2^lg_curbuckets buckets. */ - ckhc_t *tab; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, - ckh_keycomp_t *keycomp); -void ckh_delete(tsd_t *tsd, ckh_t *ckh); -size_t ckh_count(ckh_t *ckh); -bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, - void **data); -bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); -void ckh_string_hash(const void *key, size_t r_hash[2]); -bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, size_t r_hash[2]); -bool ckh_pointer_keycomp(const void *k1, const void *k2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/ckh_externs.h b/include/jemalloc/internal/ckh_externs.h new file mode 100644 index 0000000..c912f72 --- /dev/null +++ b/include/jemalloc/internal/ckh_externs.h @@ -0,0 +1,18 @@ +#ifndef JEMALLOC_INTERNAL_CKH_EXTERNS_H +#define JEMALLOC_INTERNAL_CKH_EXTERNS_H + +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); +void ckh_string_hash(const void *key, size_t r_hash[2]); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_INTERNAL_CKH_EXTERNS_H */ diff --git a/include/jemalloc/internal/ckh_structs.h b/include/jemalloc/internal/ckh_structs.h new file mode 100644 index 0000000..a800cbc --- /dev/null +++ b/include/jemalloc/internal/ckh_structs.h @@ -0,0 +1,41 @@ +#ifndef JEMALLOC_INTERNAL_CKH_STRUCTS_H +#define JEMALLOC_INTERNAL_CKH_STRUCTS_H + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ + uint64_t prng_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_INTERNAL_CKH_STRUCTS_H */ diff --git a/include/jemalloc/internal/ckh_types.h b/include/jemalloc/internal/ckh_types.h new file mode 100644 index 0000000..9a1d8d4 --- /dev/null +++ b/include/jemalloc/internal/ckh_types.h @@ -0,0 +1,22 @@ +#ifndef JEMALLOC_INTERNAL_CKH_TYPES_H +#define JEMALLOC_INTERNAL_CKH_TYPES_H + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, size_t[2]); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_INTERNAL_CKH_TYPES_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h deleted file mode 100644 index 7dc3e5b..0000000 --- a/include/jemalloc/internal/ctl.h +++ /dev/null @@ -1,127 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ctl_node_s ctl_node_t; -typedef struct ctl_named_node_s ctl_named_node_t; -typedef struct ctl_indexed_node_s ctl_indexed_node_t; -typedef struct ctl_arena_stats_s ctl_arena_stats_t; -typedef struct ctl_stats_s ctl_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ctl_node_s { - bool named; -}; - -struct ctl_named_node_s { - struct ctl_node_s node; - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - size_t nchildren; - const ctl_node_t *children; - int (*ctl)(tsd_t *, const size_t *, size_t, void *, - size_t *, void *, size_t); -}; - -struct ctl_indexed_node_s { - struct ctl_node_s node; - const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, - size_t); -}; - -struct ctl_arena_stats_s { - unsigned arena_ind; - bool initialized; - ql_elm(ctl_arena_stats_t) destroyed_link; - - unsigned nthreads; - const char *dss; - ssize_t decay_time; - size_t pactive; - size_t pdirty; - - /* The remainder are only populated if config_stats is true. */ - - arena_stats_t astats; - - /* Aggregate stats for small size classes, based on bin stats. */ - size_t allocated_small; - uint64_t nmalloc_small; - uint64_t ndalloc_small; - uint64_t nrequests_small; - - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t lstats[NSIZES - NBINS]; -}; - -struct ctl_stats_s { - uint64_t epoch; - size_t allocated; - size_t active; - size_t metadata; - size_t resident; - size_t mapped; - size_t retained; - unsigned narenas; - ql_head(ctl_arena_stats_t) destroyed; - /* - * Element 0 contains merged stats for extant arenas (accessed via - * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed - * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining - * MALLOCX_ARENA_MAX+1 elements correspond to arenas. - */ - ctl_arena_stats_t *arenas[MALLOCX_ARENA_MAX + 3]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, - size_t *miblenp); - -int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -bool ctl_boot(void); -void ctl_prefork(tsdn_t *tsdn); -void ctl_postfork_parent(tsdn_t *tsdn); -void ctl_postfork_child(tsdn_t *tsdn); - -#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ - != 0) { \ - malloc_printf( \ - ": Failure in xmallctl(\"%s\", ...)\n", \ - name); \ - abort(); \ - } \ -} while (0) - -#define xmallctlnametomib(name, mibp, miblenp) do { \ - if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ - malloc_printf(": Failure in " \ - "xmallctlnametomib(\"%s\", ...)\n", name); \ - abort(); \ - } \ -} while (0) - -#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ - newlen) != 0) { \ - malloc_write( \ - ": Failure in xmallctlbymib()\n"); \ - abort(); \ - } \ -} while (0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h new file mode 100644 index 0000000..11f77cf --- /dev/null +++ b/include/jemalloc/internal/ctl_externs.h @@ -0,0 +1,43 @@ +#ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H +#define JEMALLOC_INTERNAL_CTL_EXTERNS_H + +int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, + size_t *miblenp); + +int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +bool ctl_boot(void); +void ctl_prefork(tsdn_t *tsdn); +void ctl_postfork_parent(tsdn_t *tsdn); +void ctl_postfork_child(tsdn_t *tsdn); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_printf( \ + ": Failure in xmallctl(\"%s\", ...)\n", \ + name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ + malloc_printf(": Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + ": Failure in xmallctlbymib()\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_INTERNAL_CTL_EXTERNS_H */ diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h new file mode 100644 index 0000000..8f94c6c --- /dev/null +++ b/include/jemalloc/internal/ctl_structs.h @@ -0,0 +1,68 @@ +#ifndef JEMALLOC_INTERNAL_CTL_STRUCTS_H +#define JEMALLOC_INTERNAL_CTL_STRUCTS_H + +struct ctl_node_s { + bool named; +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + size_t nchildren; + const ctl_node_t *children; + int (*ctl)(tsd_t *, const size_t *, size_t, void *, + size_t *, void *, size_t); +}; + +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t); +}; + +struct ctl_arena_stats_s { + unsigned arena_ind; + bool initialized; + ql_elm(ctl_arena_stats_t) destroyed_link; + + unsigned nthreads; + const char *dss; + ssize_t decay_time; + size_t pactive; + size_t pdirty; + + /* The remainder are only populated if config_stats is true. */ + + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t bstats[NBINS]; + malloc_large_stats_t lstats[NSIZES - NBINS]; +}; + +struct ctl_stats_s { + uint64_t epoch; + size_t allocated; + size_t active; + size_t metadata; + size_t resident; + size_t mapped; + size_t retained; + unsigned narenas; + ql_head(ctl_arena_stats_t) destroyed; + /* + * Element 0 contains merged stats for extant arenas (accessed via + * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed + * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining + * MALLOCX_ARENA_MAX+1 elements correspond to arenas. + */ + ctl_arena_stats_t *arenas[MALLOCX_ARENA_MAX + 3]; +}; + +#endif /* JEMALLOC_INTERNAL_CTL_STRUCTS_H */ diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h new file mode 100644 index 0000000..848c4f1 --- /dev/null +++ b/include/jemalloc/internal/ctl_types.h @@ -0,0 +1,10 @@ +#ifndef JEMALLOC_INTERNAL_CTL_TYPES_H +#define JEMALLOC_INTERNAL_CTL_TYPES_H + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_INTERNAL_CTL_TYPES_H */ diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h deleted file mode 100644 index 70accff..0000000 --- a/include/jemalloc/internal/extent.h +++ /dev/null @@ -1,499 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct extent_s extent_t; - -#define EXTENT_HOOKS_INITIALIZER NULL - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Extent (span of pages). Use accessor functions for e_* fields. */ -struct extent_s { - /* Arena from which this extent came, if any. */ - arena_t *e_arena; - - /* Pointer to the extent that this structure is responsible for. */ - void *e_addr; - - /* Extent size. */ - size_t e_size; - - /* - * Usable size, typically smaller than extent size due to large_pad or - * promotion of sampled small regions. - */ - size_t e_usize; - - /* - * Serial number (potentially non-unique). - * - * In principle serial numbers can wrap around on 32-bit systems if - * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall - * back on address comparison for equal serial numbers, stable (if - * imperfect) ordering is maintained. - * - * Serial numbers may not be unique even in the absence of wrap-around, - * e.g. when splitting an extent and assigning the same serial number to - * both resulting adjacent extents. - */ - size_t e_sn; - - /* True if extent is active (in use). */ - bool e_active; - - /* - * The zeroed flag is used by extent recycling code to track whether - * memory is zero-filled. - */ - bool e_zeroed; - - /* - * True if physical memory is committed to the extent, whether - * explicitly or implicitly as on a system that overcommits and - * satisfies physical memory needs on demand via soft page faults. - */ - bool e_committed; - - /* - * The slab flag indicates whether the extent is used for a slab of - * small regions. This helps differentiate small size classes, and it - * indicates whether interior pointers can be looked up via iealloc(). - */ - bool e_slab; - - union { - /* Small region slab metadata. */ - arena_slab_data_t e_slab_data; - - /* Profile counters, used for large objects. */ - union { - void *e_prof_tctx_pun; - prof_tctx_t *e_prof_tctx; - }; - }; - - /* - * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings. - */ - qr(extent_t) qr_link; - - union { - /* Linkage for per size class sn/address-ordered heaps. */ - phn(extent_t) ph_link; - - /* Linkage for arena's large and extent_cache lists. */ - ql_elm(extent_t) ql_link; - }; -}; -typedef ph(extent_t) extent_heap_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern rtree_t extents_rtree; -extern const extent_hooks_t extent_hooks_default; - -extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); -void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); - -extent_hooks_t *extent_hooks_get(arena_t *arena); -extent_hooks_t *extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks); - -#ifdef JEMALLOC_JET -typedef size_t (extent_size_quantize_t)(size_t); -extern extent_size_quantize_t *extent_size_quantize_floor; -extern extent_size_quantize_t *extent_size_quantize_ceil; -#else -size_t extent_size_quantize_floor(size_t size); -size_t extent_size_quantize_ceil(size_t size); -#endif - -ph_proto(, extent_heap_, extent_heap_t, extent_t) - -extent_t *extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, - size_t alignment, bool *zero, bool *commit, bool slab); -extent_t *extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, - size_t alignment, bool *zero, bool *commit, bool slab); -extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, - size_t alignment, bool *zero, bool *commit, bool slab); -void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent); -void extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent); -bool extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent); -void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent); -bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, - size_t length); -bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, - size_t length); -bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, - size_t length); -bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, - size_t length); -extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, - size_t usize_a, size_t size_b, size_t usize_b); -bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b); - -bool extent_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -extent_t *extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent); -arena_t *extent_arena_get(const extent_t *extent); -void *extent_base_get(const extent_t *extent); -void *extent_addr_get(const extent_t *extent); -size_t extent_size_get(const extent_t *extent); -size_t extent_usize_get(const extent_t *extent); -void *extent_before_get(const extent_t *extent); -void *extent_last_get(const extent_t *extent); -void *extent_past_get(const extent_t *extent); -size_t extent_sn_get(const extent_t *extent); -bool extent_active_get(const extent_t *extent); -bool extent_retained_get(const extent_t *extent); -bool extent_zeroed_get(const extent_t *extent); -bool extent_committed_get(const extent_t *extent); -bool extent_slab_get(const extent_t *extent); -arena_slab_data_t *extent_slab_data_get(extent_t *extent); -const arena_slab_data_t *extent_slab_data_get_const(const extent_t *extent); -prof_tctx_t *extent_prof_tctx_get(const extent_t *extent); -void extent_arena_set(extent_t *extent, arena_t *arena); -void extent_addr_set(extent_t *extent, void *addr); -void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment); -void extent_size_set(extent_t *extent, size_t size); -void extent_usize_set(extent_t *extent, size_t usize); -void extent_sn_set(extent_t *extent, size_t sn); -void extent_active_set(extent_t *extent, bool active); -void extent_zeroed_set(extent_t *extent, bool zeroed); -void extent_committed_set(extent_t *extent, bool committed); -void extent_slab_set(extent_t *extent, bool slab); -void extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx); -void extent_init(extent_t *extent, arena_t *arena, void *addr, - size_t size, size_t usize, size_t sn, bool active, bool zeroed, - bool committed, bool slab); -void extent_ring_insert(extent_t *sentinel, extent_t *extent); -void extent_ring_remove(extent_t *extent); -int extent_sn_comp(const extent_t *a, const extent_t *b); -int extent_ad_comp(const extent_t *a, const extent_t *b); -int extent_snad_comp(const extent_t *a, const extent_t *b); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) -JEMALLOC_INLINE extent_t * -extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent) -{ - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - - return (rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, - dependent)); -} - -JEMALLOC_INLINE arena_t * -extent_arena_get(const extent_t *extent) -{ - - return (extent->e_arena); -} - -JEMALLOC_INLINE void * -extent_base_get(const extent_t *extent) -{ - - assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || - !extent->e_slab); - return (PAGE_ADDR2BASE(extent->e_addr)); -} - -JEMALLOC_INLINE void * -extent_addr_get(const extent_t *extent) -{ - - assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || - !extent->e_slab); - return (extent->e_addr); -} - -JEMALLOC_INLINE size_t -extent_size_get(const extent_t *extent) -{ - - return (extent->e_size); -} - -JEMALLOC_INLINE size_t -extent_usize_get(const extent_t *extent) -{ - - assert(!extent->e_slab); - return (extent->e_usize); -} - -JEMALLOC_INLINE void * -extent_before_get(const extent_t *extent) -{ - - return ((void *)((uintptr_t)extent_base_get(extent) - PAGE)); -} - -JEMALLOC_INLINE void * -extent_last_get(const extent_t *extent) -{ - - return ((void *)((uintptr_t)extent_base_get(extent) + - extent_size_get(extent) - PAGE)); -} - -JEMALLOC_INLINE void * -extent_past_get(const extent_t *extent) -{ - - return ((void *)((uintptr_t)extent_base_get(extent) + - extent_size_get(extent))); -} - -JEMALLOC_INLINE size_t -extent_sn_get(const extent_t *extent) -{ - - return (extent->e_sn); -} - -JEMALLOC_INLINE bool -extent_active_get(const extent_t *extent) -{ - - return (extent->e_active); -} - -JEMALLOC_INLINE bool -extent_retained_get(const extent_t *extent) -{ - - return (qr_next(extent, qr_link) == extent); -} - -JEMALLOC_INLINE bool -extent_zeroed_get(const extent_t *extent) -{ - - return (extent->e_zeroed); -} - -JEMALLOC_INLINE bool -extent_committed_get(const extent_t *extent) -{ - - return (extent->e_committed); -} - -JEMALLOC_INLINE bool -extent_slab_get(const extent_t *extent) -{ - - return (extent->e_slab); -} - -JEMALLOC_INLINE arena_slab_data_t * -extent_slab_data_get(extent_t *extent) -{ - - assert(extent->e_slab); - return (&extent->e_slab_data); -} - -JEMALLOC_INLINE const arena_slab_data_t * -extent_slab_data_get_const(const extent_t *extent) -{ - - assert(extent->e_slab); - return (&extent->e_slab_data); -} - -JEMALLOC_INLINE prof_tctx_t * -extent_prof_tctx_get(const extent_t *extent) -{ - - return ((prof_tctx_t *)atomic_read_p( - &((extent_t *)extent)->e_prof_tctx_pun)); -} - -JEMALLOC_INLINE void -extent_arena_set(extent_t *extent, arena_t *arena) -{ - - extent->e_arena = arena; -} - -JEMALLOC_INLINE void -extent_addr_set(extent_t *extent, void *addr) -{ - - extent->e_addr = addr; -} - -JEMALLOC_INLINE void -extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) -{ - - assert(extent_base_get(extent) == extent_addr_get(extent)); - - if (alignment < PAGE) { - unsigned lg_range = LG_PAGE - - lg_floor(CACHELINE_CEILING(alignment)); - size_t r = - prng_lg_range_zu(&extent_arena_get(extent)->offset_state, - lg_range, true); - uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - - lg_range); - extent->e_addr = (void *)((uintptr_t)extent->e_addr + - random_offset); - assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) == - extent->e_addr); - } -} - -JEMALLOC_INLINE void -extent_size_set(extent_t *extent, size_t size) -{ - - extent->e_size = size; -} - -JEMALLOC_INLINE void -extent_usize_set(extent_t *extent, size_t usize) -{ - - extent->e_usize = usize; -} - -JEMALLOC_INLINE void -extent_sn_set(extent_t *extent, size_t sn) -{ - - extent->e_sn = sn; -} - -JEMALLOC_INLINE void -extent_active_set(extent_t *extent, bool active) -{ - - extent->e_active = active; -} - -JEMALLOC_INLINE void -extent_zeroed_set(extent_t *extent, bool zeroed) -{ - - extent->e_zeroed = zeroed; -} - -JEMALLOC_INLINE void -extent_committed_set(extent_t *extent, bool committed) -{ - - extent->e_committed = committed; -} - -JEMALLOC_INLINE void -extent_slab_set(extent_t *extent, bool slab) -{ - - extent->e_slab = slab; -} - -JEMALLOC_INLINE void -extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) -{ - - atomic_write_p(&extent->e_prof_tctx_pun, tctx); -} - -JEMALLOC_INLINE void -extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, - size_t usize, size_t sn, bool active, bool zeroed, bool committed, - bool slab) -{ - - assert(addr == PAGE_ADDR2BASE(addr) || !slab); - - extent_arena_set(extent, arena); - extent_addr_set(extent, addr); - extent_size_set(extent, size); - extent_usize_set(extent, usize); - extent_sn_set(extent, sn); - extent_active_set(extent, active); - extent_zeroed_set(extent, zeroed); - extent_committed_set(extent, committed); - extent_slab_set(extent, slab); - if (config_prof) - extent_prof_tctx_set(extent, NULL); - qr_new(extent, qr_link); -} - -JEMALLOC_INLINE void -extent_ring_insert(extent_t *sentinel, extent_t *extent) -{ - - qr_meld(sentinel, extent, extent_t, qr_link); -} - -JEMALLOC_INLINE void -extent_ring_remove(extent_t *extent) -{ - - qr_remove(extent, qr_link); -} - -JEMALLOC_INLINE int -extent_sn_comp(const extent_t *a, const extent_t *b) -{ - size_t a_sn = extent_sn_get(a); - size_t b_sn = extent_sn_get(b); - - return ((a_sn > b_sn) - (a_sn < b_sn)); -} - -JEMALLOC_INLINE int -extent_ad_comp(const extent_t *a, const extent_t *b) -{ - uintptr_t a_addr = (uintptr_t)extent_addr_get(a); - uintptr_t b_addr = (uintptr_t)extent_addr_get(b); - - return ((a_addr > b_addr) - (a_addr < b_addr)); -} - -JEMALLOC_INLINE int -extent_snad_comp(const extent_t *a, const extent_t *b) -{ - int ret; - - ret = extent_sn_comp(a, b); - if (ret != 0) - return (ret); - - ret = extent_ad_comp(a, b); - return (ret); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/extent_dss.h" -#include "jemalloc/internal/extent_mmap.h" diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h deleted file mode 100644 index f2dac52..0000000 --- a/include/jemalloc/internal/extent_dss.h +++ /dev/null @@ -1,39 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef enum { - dss_prec_disabled = 0, - dss_prec_primary = 1, - dss_prec_secondary = 2, - - dss_prec_limit = 3 -} dss_prec_t; -#define DSS_PREC_DEFAULT dss_prec_secondary -#define DSS_DEFAULT "secondary" - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -extern const char *dss_prec_names[]; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern const char *opt_dss; - -dss_prec_t extent_dss_prec_get(void); -bool extent_dss_prec_set(dss_prec_t dss_prec); -void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, - size_t size, size_t alignment, bool *zero, bool *commit); -bool extent_in_dss(void *addr); -bool extent_dss_mergeable(void *addr_a, void *addr_b); -void extent_dss_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/extent_dss_externs.h b/include/jemalloc/internal/extent_dss_externs.h new file mode 100644 index 0000000..d376fa7 --- /dev/null +++ b/include/jemalloc/internal/extent_dss_externs.h @@ -0,0 +1,14 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H + +extern const char *opt_dss; + +dss_prec_t extent_dss_prec_get(void); +bool extent_dss_prec_set(dss_prec_t dss_prec); +void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit); +bool extent_in_dss(void *addr); +bool extent_dss_mergeable(void *addr_a, void *addr_b); +void extent_dss_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H */ diff --git a/include/jemalloc/internal/extent_dss_structs.h b/include/jemalloc/internal/extent_dss_structs.h new file mode 100644 index 0000000..2d8c6f0 --- /dev/null +++ b/include/jemalloc/internal/extent_dss_structs.h @@ -0,0 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H +#define JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H + +extern const char *dss_prec_names[]; + +#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H */ diff --git a/include/jemalloc/internal/extent_dss_types.h b/include/jemalloc/internal/extent_dss_types.h new file mode 100644 index 0000000..2839757 --- /dev/null +++ b/include/jemalloc/internal/extent_dss_types.h @@ -0,0 +1,14 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H +#define JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H + +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + +#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H */ diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h new file mode 100644 index 0000000..59f3c7c --- /dev/null +++ b/include/jemalloc/internal/extent_externs.h @@ -0,0 +1,60 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H + +extern rtree_t extents_rtree; +extern const extent_hooks_t extent_hooks_default; + +extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); +void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); + +extent_hooks_t *extent_hooks_get(arena_t *arena); +extent_hooks_t *extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks); + +#ifdef JEMALLOC_JET +typedef size_t (extent_size_quantize_t)(size_t); +extern extent_size_quantize_t *extent_size_quantize_floor; +extern extent_size_quantize_t *extent_size_quantize_ceil; +#else +size_t extent_size_quantize_floor(size_t size); +size_t extent_size_quantize_ceil(size_t size); +#endif + +ph_proto(, extent_heap_, extent_heap_t, extent_t) + +extent_t *extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, + size_t alignment, bool *zero, bool *commit, bool slab); +extent_t *extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, + size_t alignment, bool *zero, bool *commit, bool slab); +extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad, + size_t alignment, bool *zero, bool *commit, bool slab); +void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent); +void extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +bool extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, + size_t usize_a, size_t size_b, size_t usize_b); +bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b); + +bool extent_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */ diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h new file mode 100644 index 0000000..e48af92 --- /dev/null +++ b/include/jemalloc/internal/extent_inlines.h @@ -0,0 +1,343 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H +#define JEMALLOC_INTERNAL_EXTENT_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +extent_t *extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent); +arena_t *extent_arena_get(const extent_t *extent); +void *extent_base_get(const extent_t *extent); +void *extent_addr_get(const extent_t *extent); +size_t extent_size_get(const extent_t *extent); +size_t extent_usize_get(const extent_t *extent); +void *extent_before_get(const extent_t *extent); +void *extent_last_get(const extent_t *extent); +void *extent_past_get(const extent_t *extent); +size_t extent_sn_get(const extent_t *extent); +bool extent_active_get(const extent_t *extent); +bool extent_retained_get(const extent_t *extent); +bool extent_zeroed_get(const extent_t *extent); +bool extent_committed_get(const extent_t *extent); +bool extent_slab_get(const extent_t *extent); +arena_slab_data_t *extent_slab_data_get(extent_t *extent); +const arena_slab_data_t *extent_slab_data_get_const(const extent_t *extent); +prof_tctx_t *extent_prof_tctx_get(const extent_t *extent); +void extent_arena_set(extent_t *extent, arena_t *arena); +void extent_addr_set(extent_t *extent, void *addr); +void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment); +void extent_size_set(extent_t *extent, size_t size); +void extent_usize_set(extent_t *extent, size_t usize); +void extent_sn_set(extent_t *extent, size_t sn); +void extent_active_set(extent_t *extent, bool active); +void extent_zeroed_set(extent_t *extent, bool zeroed); +void extent_committed_set(extent_t *extent, bool committed); +void extent_slab_set(extent_t *extent, bool slab); +void extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx); +void extent_init(extent_t *extent, arena_t *arena, void *addr, + size_t size, size_t usize, size_t sn, bool active, bool zeroed, + bool committed, bool slab); +void extent_ring_insert(extent_t *sentinel, extent_t *extent); +void extent_ring_remove(extent_t *extent); +int extent_sn_comp(const extent_t *a, const extent_t *b); +int extent_ad_comp(const extent_t *a, const extent_t *b); +int extent_snad_comp(const extent_t *a, const extent_t *b); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) +JEMALLOC_INLINE extent_t * +extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent) +{ + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + return (rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, + dependent)); +} + +JEMALLOC_INLINE arena_t * +extent_arena_get(const extent_t *extent) +{ + + return (extent->e_arena); +} + +JEMALLOC_INLINE void * +extent_base_get(const extent_t *extent) +{ + + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent->e_slab); + return (PAGE_ADDR2BASE(extent->e_addr)); +} + +JEMALLOC_INLINE void * +extent_addr_get(const extent_t *extent) +{ + + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent->e_slab); + return (extent->e_addr); +} + +JEMALLOC_INLINE size_t +extent_size_get(const extent_t *extent) +{ + + return (extent->e_size); +} + +JEMALLOC_INLINE size_t +extent_usize_get(const extent_t *extent) +{ + + assert(!extent->e_slab); + return (extent->e_usize); +} + +JEMALLOC_INLINE void * +extent_before_get(const extent_t *extent) +{ + + return ((void *)((uintptr_t)extent_base_get(extent) - PAGE)); +} + +JEMALLOC_INLINE void * +extent_last_get(const extent_t *extent) +{ + + return ((void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent) - PAGE)); +} + +JEMALLOC_INLINE void * +extent_past_get(const extent_t *extent) +{ + + return ((void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent))); +} + +JEMALLOC_INLINE size_t +extent_sn_get(const extent_t *extent) +{ + + return (extent->e_sn); +} + +JEMALLOC_INLINE bool +extent_active_get(const extent_t *extent) +{ + + return (extent->e_active); +} + +JEMALLOC_INLINE bool +extent_retained_get(const extent_t *extent) +{ + + return (qr_next(extent, qr_link) == extent); +} + +JEMALLOC_INLINE bool +extent_zeroed_get(const extent_t *extent) +{ + + return (extent->e_zeroed); +} + +JEMALLOC_INLINE bool +extent_committed_get(const extent_t *extent) +{ + + return (extent->e_committed); +} + +JEMALLOC_INLINE bool +extent_slab_get(const extent_t *extent) +{ + + return (extent->e_slab); +} + +JEMALLOC_INLINE arena_slab_data_t * +extent_slab_data_get(extent_t *extent) +{ + + assert(extent->e_slab); + return (&extent->e_slab_data); +} + +JEMALLOC_INLINE const arena_slab_data_t * +extent_slab_data_get_const(const extent_t *extent) +{ + + assert(extent->e_slab); + return (&extent->e_slab_data); +} + +JEMALLOC_INLINE prof_tctx_t * +extent_prof_tctx_get(const extent_t *extent) +{ + + return ((prof_tctx_t *)atomic_read_p( + &((extent_t *)extent)->e_prof_tctx_pun)); +} + +JEMALLOC_INLINE void +extent_arena_set(extent_t *extent, arena_t *arena) +{ + + extent->e_arena = arena; +} + +JEMALLOC_INLINE void +extent_addr_set(extent_t *extent, void *addr) +{ + + extent->e_addr = addr; +} + +JEMALLOC_INLINE void +extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) +{ + + assert(extent_base_get(extent) == extent_addr_get(extent)); + + if (alignment < PAGE) { + unsigned lg_range = LG_PAGE - + lg_floor(CACHELINE_CEILING(alignment)); + size_t r = + prng_lg_range_zu(&extent_arena_get(extent)->offset_state, + lg_range, true); + uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - + lg_range); + extent->e_addr = (void *)((uintptr_t)extent->e_addr + + random_offset); + assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) == + extent->e_addr); + } +} + +JEMALLOC_INLINE void +extent_size_set(extent_t *extent, size_t size) +{ + + extent->e_size = size; +} + +JEMALLOC_INLINE void +extent_usize_set(extent_t *extent, size_t usize) +{ + + extent->e_usize = usize; +} + +JEMALLOC_INLINE void +extent_sn_set(extent_t *extent, size_t sn) +{ + + extent->e_sn = sn; +} + +JEMALLOC_INLINE void +extent_active_set(extent_t *extent, bool active) +{ + + extent->e_active = active; +} + +JEMALLOC_INLINE void +extent_zeroed_set(extent_t *extent, bool zeroed) +{ + + extent->e_zeroed = zeroed; +} + +JEMALLOC_INLINE void +extent_committed_set(extent_t *extent, bool committed) +{ + + extent->e_committed = committed; +} + +JEMALLOC_INLINE void +extent_slab_set(extent_t *extent, bool slab) +{ + + extent->e_slab = slab; +} + +JEMALLOC_INLINE void +extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) +{ + + atomic_write_p(&extent->e_prof_tctx_pun, tctx); +} + +JEMALLOC_INLINE void +extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, + size_t usize, size_t sn, bool active, bool zeroed, bool committed, + bool slab) +{ + + assert(addr == PAGE_ADDR2BASE(addr) || !slab); + + extent_arena_set(extent, arena); + extent_addr_set(extent, addr); + extent_size_set(extent, size); + extent_usize_set(extent, usize); + extent_sn_set(extent, sn); + extent_active_set(extent, active); + extent_zeroed_set(extent, zeroed); + extent_committed_set(extent, committed); + extent_slab_set(extent, slab); + if (config_prof) + extent_prof_tctx_set(extent, NULL); + qr_new(extent, qr_link); +} + +JEMALLOC_INLINE void +extent_ring_insert(extent_t *sentinel, extent_t *extent) +{ + + qr_meld(sentinel, extent, extent_t, qr_link); +} + +JEMALLOC_INLINE void +extent_ring_remove(extent_t *extent) +{ + + qr_remove(extent, qr_link); +} + +JEMALLOC_INLINE int +extent_sn_comp(const extent_t *a, const extent_t *b) +{ + size_t a_sn = extent_sn_get(a); + size_t b_sn = extent_sn_get(b); + + return ((a_sn > b_sn) - (a_sn < b_sn)); +} + +JEMALLOC_INLINE int +extent_ad_comp(const extent_t *a, const extent_t *b) +{ + uintptr_t a_addr = (uintptr_t)extent_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_addr_get(b); + + return ((a_addr > b_addr) - (a_addr < b_addr)); +} + +JEMALLOC_INLINE int +extent_snad_comp(const extent_t *a, const extent_t *b) +{ + int ret; + + ret = extent_sn_comp(a, b); + if (ret != 0) + return (ret); + + ret = extent_ad_comp(a, b); + return (ret); +} +#endif + +#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */ diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h deleted file mode 100644 index 3c1a788..0000000 --- a/include/jemalloc/internal/extent_mmap.h +++ /dev/null @@ -1,21 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, - bool *zero, bool *commit); -bool extent_dalloc_mmap(void *addr, size_t size); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/extent_mmap_externs.h b/include/jemalloc/internal/extent_mmap_externs.h new file mode 100644 index 0000000..5917b53 --- /dev/null +++ b/include/jemalloc/internal/extent_mmap_externs.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H + +void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); +bool extent_dalloc_mmap(void *addr, size_t size); + +#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */ diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h new file mode 100644 index 0000000..de31317 --- /dev/null +++ b/include/jemalloc/internal/extent_structs.h @@ -0,0 +1,84 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H +#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H + +/* Extent (span of pages). Use accessor functions for e_* fields. */ +struct extent_s { + /* Arena from which this extent came, if any. */ + arena_t *e_arena; + + /* Pointer to the extent that this structure is responsible for. */ + void *e_addr; + + /* Extent size. */ + size_t e_size; + + /* + * Usable size, typically smaller than extent size due to large_pad or + * promotion of sampled small regions. + */ + size_t e_usize; + + /* + * Serial number (potentially non-unique). + * + * In principle serial numbers can wrap around on 32-bit systems if + * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall + * back on address comparison for equal serial numbers, stable (if + * imperfect) ordering is maintained. + * + * Serial numbers may not be unique even in the absence of wrap-around, + * e.g. when splitting an extent and assigning the same serial number to + * both resulting adjacent extents. + */ + size_t e_sn; + + /* True if extent is active (in use). */ + bool e_active; + + /* + * The zeroed flag is used by extent recycling code to track whether + * memory is zero-filled. + */ + bool e_zeroed; + + /* + * True if physical memory is committed to the extent, whether + * explicitly or implicitly as on a system that overcommits and + * satisfies physical memory needs on demand via soft page faults. + */ + bool e_committed; + + /* + * The slab flag indicates whether the extent is used for a slab of + * small regions. This helps differentiate small size classes, and it + * indicates whether interior pointers can be looked up via iealloc(). + */ + bool e_slab; + + union { + /* Small region slab metadata. */ + arena_slab_data_t e_slab_data; + + /* Profile counters, used for large objects. */ + union { + void *e_prof_tctx_pun; + prof_tctx_t *e_prof_tctx; + }; + }; + + /* + * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings. + */ + qr(extent_t) qr_link; + + union { + /* Linkage for per size class sn/address-ordered heaps. */ + phn(extent_t) ph_link; + + /* Linkage for arena's large and extent_cache lists. */ + ql_elm(extent_t) ql_link; + }; +}; +typedef ph(extent_t) extent_heap_t; + +#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */ diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h new file mode 100644 index 0000000..4873dc5 --- /dev/null +++ b/include/jemalloc/internal/extent_types.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H +#define JEMALLOC_INTERNAL_EXTENT_TYPES_H + +typedef struct extent_s extent_t; + +#define EXTENT_HOOKS_INITIALIZER NULL + +#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h deleted file mode 100644 index 1ff2d9a..0000000 --- a/include/jemalloc/internal/hash.h +++ /dev/null @@ -1,357 +0,0 @@ -/* - * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See https://github.com/aappleby/smhasher for - * details. - */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t hash_x86_32(const void *key, int len, uint32_t seed); -void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]); -void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]); -void hash(const void *key, size_t len, const uint32_t seed, - size_t r_hash[2]); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/******************************************************************************/ -/* Internal implementation. */ -JEMALLOC_INLINE uint32_t -hash_rotl_32(uint32_t x, int8_t r) -{ - - return ((x << r) | (x >> (32 - r))); -} - -JEMALLOC_INLINE uint64_t -hash_rotl_64(uint64_t x, int8_t r) -{ - - return ((x << r) | (x >> (64 - r))); -} - -JEMALLOC_INLINE uint32_t -hash_get_block_32(const uint32_t *p, int i) -{ - - /* Handle unaligned read. */ - if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { - uint32_t ret; - - memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); - return (ret); - } - - return (p[i]); -} - -JEMALLOC_INLINE uint64_t -hash_get_block_64(const uint64_t *p, int i) -{ - - /* Handle unaligned read. */ - if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { - uint64_t ret; - - memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); - return (ret); - } - - return (p[i]); -} - -JEMALLOC_INLINE uint32_t -hash_fmix_32(uint32_t h) -{ - - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return (h); -} - -JEMALLOC_INLINE uint64_t -hash_fmix_64(uint64_t k) -{ - - k ^= k >> 33; - k *= KQU(0xff51afd7ed558ccd); - k ^= k >> 33; - k *= KQU(0xc4ceb9fe1a85ec53); - k ^= k >> 33; - - return (k); -} - -JEMALLOC_INLINE uint32_t -hash_x86_32(const void *key, int len, uint32_t seed) -{ - const uint8_t *data = (const uint8_t *) key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - /* body */ - { - const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); - int i; - - for (i = -nblocks; i; i++) { - uint32_t k1 = hash_get_block_32(blocks, i); - - k1 *= c1; - k1 = hash_rotl_32(k1, 15); - k1 *= c2; - - h1 ^= k1; - h1 = hash_rotl_32(h1, 13); - h1 = h1*5 + 0xe6546b64; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t *) (data + nblocks*4); - - uint32_t k1 = 0; - - switch (len & 3) { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); - k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; - - h1 = hash_fmix_32(h1); - - return (h1); -} - -UNUSED JEMALLOC_INLINE void -hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) -{ - const uint8_t * data = (const uint8_t *) key; - const int nblocks = len / 16; - - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - - const uint32_t c1 = 0x239b961b; - const uint32_t c2 = 0xab0e9789; - const uint32_t c3 = 0x38b34ae5; - const uint32_t c4 = 0xa1e38b93; - - /* body */ - { - const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); - int i; - - for (i = -nblocks; i; i++) { - uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); - uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); - uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); - uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); - - k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; - - h1 = hash_rotl_32(h1, 19); h1 += h2; - h1 = h1*5 + 0x561ccd1b; - - k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; - - h2 = hash_rotl_32(h2, 17); h2 += h3; - h2 = h2*5 + 0x0bcaa747; - - k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; - - h3 = hash_rotl_32(h3, 15); h3 += h4; - h3 = h3*5 + 0x96cd1c35; - - k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; - - h4 = hash_rotl_32(h4, 13); h4 += h1; - h4 = h4*5 + 0x32ac3b17; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t *) (data + nblocks*16); - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - - switch (len & 15) { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - h1 = hash_fmix_32(h1); - h2 = hash_fmix_32(h2); - h3 = hash_fmix_32(h3); - h4 = hash_fmix_32(h4); - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - r_out[0] = (((uint64_t) h2) << 32) | h1; - r_out[1] = (((uint64_t) h4) << 32) | h3; -} - -UNUSED JEMALLOC_INLINE void -hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) -{ - const uint8_t *data = (const uint8_t *) key; - const int nblocks = len / 16; - - uint64_t h1 = seed; - uint64_t h2 = seed; - - const uint64_t c1 = KQU(0x87c37b91114253d5); - const uint64_t c2 = KQU(0x4cf5ad432745937f); - - /* body */ - { - const uint64_t *blocks = (const uint64_t *) (data); - int i; - - for (i = 0; i < nblocks; i++) { - uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); - uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); - - k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; - - h1 = hash_rotl_64(h1, 27); h1 += h2; - h1 = h1*5 + 0x52dce729; - - k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - h2 = hash_rotl_64(h2, 31); h2 += h1; - h2 = h2*5 + 0x38495ab5; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t*)(data + nblocks*16); - uint64_t k1 = 0; - uint64_t k2 = 0; - - switch (len & 15) { - case 15: k2 ^= ((uint64_t)(tail[14])) << 48; - case 14: k2 ^= ((uint64_t)(tail[13])) << 40; - case 13: k2 ^= ((uint64_t)(tail[12])) << 32; - case 12: k2 ^= ((uint64_t)(tail[11])) << 24; - case 11: k2 ^= ((uint64_t)(tail[10])) << 16; - case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; - case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; - k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; - case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; - case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; - case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; - case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; - case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; - case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; - case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; - k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = hash_fmix_64(h1); - h2 = hash_fmix_64(h2); - - h1 += h2; - h2 += h1; - - r_out[0] = h1; - r_out[1] = h2; -} - -/******************************************************************************/ -/* API. */ -JEMALLOC_INLINE void -hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) -{ - - assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ - -#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); -#else - { - uint64_t hashes[2]; - hash_x86_128(key, (int)len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; - } -#endif -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h new file mode 100644 index 0000000..0340418 --- /dev/null +++ b/include/jemalloc/internal/hash_inlines.h @@ -0,0 +1,345 @@ +#ifndef JEMALLOC_INTERNAL_HASH_INLINES_H +#define JEMALLOC_INTERNAL_HASH_INLINES_H + +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for + * details. + */ + +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t hash_x86_32(const void *key, int len, uint32_t seed); +void hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]); +void hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) +{ + + return ((x << r) | (x >> (32 - r))); +} + +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + + return ((x << r) | (x >> (64 - r))); +} + +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ + + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); + return (ret); + } + + return (p[i]); +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); + return (ret); + } + + return (p[i]); +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return (h); +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= KQU(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= KQU(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return (k); +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; + + h1 = hash_fmix_32(h1); + + return (h1); +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = KQU(0x87c37b91114253d5); + const uint64_t c2 = KQU(0x4cf5ad432745937f); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ + + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + +#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); +#else + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } +#endif +} +#endif + +#endif /* JEMALLOC_INTERNAL_HASH_INLINES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a558012..dfbb4b6 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -176,21 +176,38 @@ static const bool have_thp = /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: + * substantial performance degradation. * + * Historically, we dealt with this by each header into four sections (types, + * structs, externs, and inlines), and included each header file multiple times + * in this file, picking out the portion we want on each pass using the + * following #defines: * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data * types. * JEMALLOC_H_STRUCTS : Data structures. * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. * JEMALLOC_H_INLINES : Inline functions. + * + * We're moving toward a world in which the dependencies are explicit; each file + * will #include the headers it depends on (rather than relying on them being + * implicitly available via this file including every header file in the + * project). + * + * We're now in an intermediate state: we've broken up the header files to avoid + * having to include each one multiple times, but have not yet moved the + * dependency information into the header files (i.e. we still rely on the + * ordering in this file to ensure all a header's dependencies are available in + * its translation unit). Each component is now broken up into multiple header + * files, corresponding to the sections above (e.g. instead of "tsd.h", we now + * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h"). */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES #include "jemalloc/internal/jemalloc_internal_macros.h" +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + /* Page size index type. */ typedef unsigned pszind_t; @@ -362,69 +379,57 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/nstime_types.h" +#include "jemalloc/internal/util_types.h" +#include "jemalloc/internal/spin_types.h" +#include "jemalloc/internal/prng_types.h" +#include "jemalloc/internal/ticker_types.h" +#include "jemalloc/internal/ckh_types.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES +#include "jemalloc/internal/stats_types.h" +#include "jemalloc/internal/ctl_types.h" +#include "jemalloc/internal/witness_types.h" +#include "jemalloc/internal/mutex_types.h" +#include "jemalloc/internal/tsd_types.h" +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/extent_dss_types.h" +#include "jemalloc/internal/base_types.h" +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/bitmap_types.h" +#include "jemalloc/internal/rtree_types.h" +#include "jemalloc/internal/pages_types.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/prof_types.h" + + /******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/bitmap.h" -#define JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#define JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_STRUCTS +/* STRUCTS */ +/******************************************************************************/ + +#include "jemalloc/internal/nstime_structs.h" +#include "jemalloc/internal/spin_structs.h" +#include "jemalloc/internal/ticker_structs.h" +#include "jemalloc/internal/ckh_structs.h" +#include "jemalloc/internal/stats_structs.h" +#include "jemalloc/internal/ctl_structs.h" +#include "jemalloc/internal/witness_structs.h" +#include "jemalloc/internal/mutex_structs.h" +#include "jemalloc/internal/bitmap_structs.h" +#include "jemalloc/internal/arena_structs_a.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/extent_dss_structs.h" +#include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/arena_structs_b.h" +#include "jemalloc/internal/rtree_structs.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/tsd_structs.h" + + +/******************************************************************************/ +/* EXTERNS */ /******************************************************************************/ -#define JEMALLOC_H_EXTERNS extern bool opt_abort; extern const char *opt_junk; @@ -482,54 +487,42 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_EXTERNS +#include "jemalloc/internal/nstime_externs.h" +#include "jemalloc/internal/util_externs.h" +#include "jemalloc/internal/atomic_externs.h" +#include "jemalloc/internal/ckh_externs.h" +#include "jemalloc/internal/stats_externs.h" +#include "jemalloc/internal/ctl_externs.h" +#include "jemalloc/internal/witness_externs.h" +#include "jemalloc/internal/mutex_externs.h" +#include "jemalloc/internal/bitmap_externs.h" +#include "jemalloc/internal/extent_externs.h" +#include "jemalloc/internal/extent_dss_externs.h" +#include "jemalloc/internal/extent_mmap_externs.h" +#include "jemalloc/internal/base_externs.h" +#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/rtree_externs.h" +#include "jemalloc/internal/pages_externs.h" +#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/tsd_externs.h" + +/******************************************************************************/ +/* INLINES */ /******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/large.h" + +#include "jemalloc/internal/util_inlines.h" +#include "jemalloc/internal/atomic_inlines.h" +#include "jemalloc/internal/spin_inlines.h" +#include "jemalloc/internal/prng_inlines.h" +#include "jemalloc/internal/ticker_inlines.h" +#include "jemalloc/internal/tsd_inlines.h" +#include "jemalloc/internal/witness_inlines.h" +#include "jemalloc/internal/mutex_inlines.h" +#include "jemalloc/internal/rtree_inlines.h" +#include "jemalloc/internal/extent_inlines.h" +#include "jemalloc/internal/base_inlines.h" #ifndef JEMALLOC_ENABLE_INLINE pszind_t psz2ind(size_t psz); @@ -925,14 +918,12 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) } #endif -#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/bitmap_inlines.h" /* - * Include portions of arena.h interleaved with tcache.h in order to resolve - * circular dependencies. + * Include portions of arena code interleaved with tcache code in order to + * resolve circular dependencies. */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena_inlines_a.h" #ifndef JEMALLOC_ENABLE_INLINE extent_t *iealloc(tsdn_t *tsdn, const void *ptr); @@ -947,11 +938,9 @@ iealloc(tsdn_t *tsdn, const void *ptr) } #endif -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/hash_inlines.h" #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(tsdn_t *tsdn, const void *ptr); @@ -1211,10 +1200,8 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size, } #endif -#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" -#undef JEMALLOC_H_INLINES -/******************************************************************************/ #ifdef __cplusplus } diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 5749204..80820f8 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -1,3 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_MACROS_H +#define JEMALLOC_INTERNAL_MACROS_H + /* * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for * functions that are static inline functions if inlining is enabled, and @@ -55,3 +58,5 @@ #if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus) # define restrict #endif + +#endif /* JEMALLOC_INTERNAL_MACROS_H */ diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h deleted file mode 100644 index f3d382b..0000000 --- a/include/jemalloc/internal/large.h +++ /dev/null @@ -1,40 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); -void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero); -bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, - size_t usize_max, bool zero); -void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, - size_t usize, size_t alignment, bool zero, tcache_t *tcache); -#ifdef JEMALLOC_JET -typedef void (large_dalloc_junk_t)(void *, size_t); -extern large_dalloc_junk_t *large_dalloc_junk; -typedef void (large_dalloc_maybe_junk_t)(void *, size_t); -extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk; -#else -void large_dalloc_junk(void *ptr, size_t usize); -void large_dalloc_maybe_junk(void *ptr, size_t usize); -#endif -void large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent); -void large_dalloc(tsdn_t *tsdn, extent_t *extent); -size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); -prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); -void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); -void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h new file mode 100644 index 0000000..f0a0339 --- /dev/null +++ b/include/jemalloc/internal/large_externs.h @@ -0,0 +1,27 @@ +#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H + +void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); +void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero); +bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, + size_t usize_max, bool zero); +void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, + size_t usize, size_t alignment, bool zero, tcache_t *tcache); +#ifdef JEMALLOC_JET +typedef void (large_dalloc_junk_t)(void *, size_t); +extern large_dalloc_junk_t *large_dalloc_junk; +typedef void (large_dalloc_maybe_junk_t)(void *, size_t); +extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk; +#else +void large_dalloc_junk(void *ptr, size_t usize); +void large_dalloc_maybe_junk(void *ptr, size_t usize); +#endif +void large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent); +void large_dalloc(tsdn_t *tsdn, extent_t *extent); +size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); +prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); +void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); +void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); + +#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h deleted file mode 100644 index d5b3693..0000000 --- a/include/jemalloc/internal/mutex.h +++ /dev/null @@ -1,150 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct malloc_mutex_s malloc_mutex_t; - -#ifdef _WIN32 -# define MALLOC_MUTEX_INITIALIZER -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) -# define MALLOC_MUTEX_INITIALIZER \ - {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -#elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER \ - {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, NULL, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -#else -# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ - defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -# else -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -# endif -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct malloc_mutex_s { -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - SRWLOCK lock; -# else - CRITICAL_SECTION lock; -# endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock lock; -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLock lock; -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) - pthread_mutex_t lock; - malloc_mutex_t *postponed_next; -#else - pthread_mutex_t lock; -#endif - witness_t witness; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_LAZY_LOCK -extern bool isthreaded; -#else -# undef isthreaded /* Undo private_namespace.h definition. */ -# define isthreaded true -#endif - -bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, - witness_rank_t rank); -void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); -bool malloc_mutex_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE void -malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) { - witness_assert_not_owner(tsdn, &mutex->witness); -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - AcquireSRWLockExclusive(&mutex->lock); -# else - EnterCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_lock(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockLock(&mutex->lock); -#else - pthread_mutex_lock(&mutex->lock); -#endif - witness_lock(tsdn, &mutex->witness); - } -} - -JEMALLOC_INLINE void -malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) { - witness_unlock(tsdn, &mutex->witness); -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - ReleaseSRWLockExclusive(&mutex->lock); -# else - LeaveCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_unlock(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockUnlock(&mutex->lock); -#else - pthread_mutex_unlock(&mutex->lock); -#endif - } -} - -JEMALLOC_INLINE void -malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) - witness_assert_owner(tsdn, &mutex->witness); -} - -JEMALLOC_INLINE void -malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ - - if (isthreaded) - witness_assert_not_owner(tsdn, &mutex->witness); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h new file mode 100644 index 0000000..ba6418e --- /dev/null +++ b/include/jemalloc/internal/mutex_externs.h @@ -0,0 +1,18 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_EXTERNS_H +#define JEMALLOC_INTERNAL_MUTEX_EXTERNS_H + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#else +# undef isthreaded /* Undo private_namespace.h definition. */ +# define isthreaded true +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, + witness_rank_t rank); +void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); +bool malloc_mutex_boot(void); + +#endif /* JEMALLOC_INTERNAL_MUTEX_EXTERNS_H */ diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h new file mode 100644 index 0000000..b769f0c --- /dev/null +++ b/include/jemalloc/internal/mutex_inlines.h @@ -0,0 +1,74 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H +#define JEMALLOC_INTERNAL_MUTEX_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) { + witness_assert_not_owner(tsdn, &mutex->witness); +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + AcquireSRWLockExclusive(&mutex->lock); +# else + EnterCriticalSection(&mutex->lock); +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockLock(&mutex->lock); +#else + pthread_mutex_lock(&mutex->lock); +#endif + witness_lock(tsdn, &mutex->witness); + } +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) { + witness_unlock(tsdn, &mutex->witness); +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + ReleaseSRWLockExclusive(&mutex->lock); +# else + LeaveCriticalSection(&mutex->lock); +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockUnlock(&mutex->lock); +#else + pthread_mutex_unlock(&mutex->lock); +#endif + } +} + +JEMALLOC_INLINE void +malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_owner(tsdn, &mutex->witness); +} + +JEMALLOC_INLINE void +malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_not_owner(tsdn, &mutex->witness); +} +#endif + +#endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */ diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h new file mode 100644 index 0000000..4a18a07 --- /dev/null +++ b/include/jemalloc/internal/mutex_structs.h @@ -0,0 +1,24 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H +#define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H + +struct malloc_mutex_s { +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else + CRITICAL_SECTION lock; +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif + witness_t witness; +}; + +#endif /* JEMALLOC_INTERNAL_MUTEX_STRUCTS_H */ diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h new file mode 100644 index 0000000..8c9f249 --- /dev/null +++ b/include/jemalloc/internal/mutex_types.h @@ -0,0 +1,33 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H +#define JEMALLOC_INTERNAL_MUTEX_TYPES_H + +typedef struct malloc_mutex_s malloc_mutex_t; + +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER \ + {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, NULL, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#else +# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# endif +#endif + +#endif /* JEMALLOC_INTERNAL_MUTEX_TYPES_H */ diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h deleted file mode 100644 index 93b27dc..0000000 --- a/include/jemalloc/internal/nstime.h +++ /dev/null @@ -1,48 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct nstime_s nstime_t; - -/* Maximum supported number of seconds (~584 years). */ -#define NSTIME_SEC_MAX KQU(18446744072) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct nstime_s { - uint64_t ns; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void nstime_init(nstime_t *time, uint64_t ns); -void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); -uint64_t nstime_ns(const nstime_t *time); -uint64_t nstime_sec(const nstime_t *time); -uint64_t nstime_nsec(const nstime_t *time); -void nstime_copy(nstime_t *time, const nstime_t *source); -int nstime_compare(const nstime_t *a, const nstime_t *b); -void nstime_add(nstime_t *time, const nstime_t *addend); -void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); -void nstime_imultiply(nstime_t *time, uint64_t multiplier); -void nstime_idivide(nstime_t *time, uint64_t divisor); -uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); -#ifdef JEMALLOC_JET -typedef bool (nstime_monotonic_t)(void); -extern nstime_monotonic_t *nstime_monotonic; -typedef bool (nstime_update_t)(nstime_t *); -extern nstime_update_t *nstime_update; -#else -bool nstime_monotonic(void); -bool nstime_update(nstime_t *time); -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/nstime_externs.h b/include/jemalloc/internal/nstime_externs.h new file mode 100644 index 0000000..cf14ae0 --- /dev/null +++ b/include/jemalloc/internal/nstime_externs.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_NSTIME_EXTERNS_H +#define JEMALLOC_INTERNAL_NSTIME_EXTERNS_H + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); +#ifdef JEMALLOC_JET +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *nstime_monotonic; +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *nstime_update; +#else +bool nstime_monotonic(void); +bool nstime_update(nstime_t *time); +#endif + +#endif /* JEMALLOC_INTERNAL_NSTIME_EXTERNS_H */ diff --git a/include/jemalloc/internal/nstime_structs.h b/include/jemalloc/internal/nstime_structs.h new file mode 100644 index 0000000..a637f61 --- /dev/null +++ b/include/jemalloc/internal/nstime_structs.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_NSTIME_STRUCTS_H +#define JEMALLOC_INTERNAL_NSTIME_STRUCTS_H + +struct nstime_s { + uint64_t ns; +}; + +#endif /* JEMALLOC_INTERNAL_NSTIME_STRUCTS_H */ diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h new file mode 100644 index 0000000..861c5a8 --- /dev/null +++ b/include/jemalloc/internal/nstime_types.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_NSTIME_TYPES_H +#define JEMALLOC_INTERNAL_NSTIME_TYPES_H + +typedef struct nstime_s nstime_t; + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX KQU(18446744072) + +#endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */ diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h deleted file mode 100644 index 98e4f38..0000000 --- a/include/jemalloc/internal/pages.h +++ /dev/null @@ -1,85 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Page size. LG_PAGE is determined by the configure script. */ -#ifdef PAGE_MASK -# undef PAGE_MASK -#endif -#define PAGE ((size_t)(1U << LG_PAGE)) -#define PAGE_MASK ((size_t)(PAGE - 1)) -/* Return the page base address for the page containing address a. */ -#define PAGE_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~PAGE_MASK)) -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -/* Huge page size. LG_HUGEPAGE is determined by the configure script. */ -#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE)) -#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1)) -/* Return the huge page base address for the huge page containing address a. */ -#define HUGEPAGE_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK)) -/* Return the smallest pagesize multiple that is >= s. */ -#define HUGEPAGE_CEILING(s) \ - (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK) - -/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */ -#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE) -# define PAGES_CAN_PURGE_LAZY -#endif -/* - * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported. - * - * The only supported way to hard-purge on Windows is to decommit and then - * re-commit, but doing so is racy, and if re-commit fails it's a pain to - * propagate the "poisoned" memory state. Since we typically decommit as the - * next step after purging on Windows anyway, there's no point in adding such - * complexity. - */ -#if !defined(_WIN32) && defined(JEMALLOC_PURGE_MADVISE_DONTNEED) -# define PAGES_CAN_PURGE_FORCED -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -static const bool pages_can_purge_lazy = -#ifdef PAGES_CAN_PURGE_LAZY - true -#else - false -#endif - ; -static const bool pages_can_purge_forced = -#ifdef PAGES_CAN_PURGE_FORCED - true -#else - false -#endif - ; - -void *pages_map(void *addr, size_t size, bool *commit); -void pages_unmap(void *addr, size_t size); -void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, - size_t size, bool *commit); -bool pages_commit(void *addr, size_t size); -bool pages_decommit(void *addr, size_t size); -bool pages_purge_lazy(void *addr, size_t size); -bool pages_purge_forced(void *addr, size_t size); -bool pages_huge(void *addr, size_t size); -bool pages_nohuge(void *addr, size_t size); -void pages_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/pages_externs.h b/include/jemalloc/internal/pages_externs.h new file mode 100644 index 0000000..7e34efb --- /dev/null +++ b/include/jemalloc/internal/pages_externs.h @@ -0,0 +1,31 @@ +#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H +#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H + +static const bool pages_can_purge_lazy = +#ifdef PAGES_CAN_PURGE_LAZY + true +#else + false +#endif + ; +static const bool pages_can_purge_forced = +#ifdef PAGES_CAN_PURGE_FORCED + true +#else + false +#endif + ; + +void *pages_map(void *addr, size_t size, bool *commit); +void pages_unmap(void *addr, size_t size); +void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, + size_t size, bool *commit); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge_lazy(void *addr, size_t size); +bool pages_purge_forced(void *addr, size_t size); +bool pages_huge(void *addr, size_t size); +bool pages_nohuge(void *addr, size_t size); +void pages_boot(void); + +#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/include/jemalloc/internal/pages_types.h b/include/jemalloc/internal/pages_types.h new file mode 100644 index 0000000..be1e245 --- /dev/null +++ b/include/jemalloc/internal/pages_types.h @@ -0,0 +1,44 @@ +#ifndef JEMALLOC_INTERNAL_PAGES_TYPES_H +#define JEMALLOC_INTERNAL_PAGES_TYPES_H + +/* Page size. LG_PAGE is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define PAGE ((size_t)(1U << LG_PAGE)) +#define PAGE_MASK ((size_t)(PAGE - 1)) +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +/* Huge page size. LG_HUGEPAGE is determined by the configure script. */ +#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE)) +#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1)) +/* Return the huge page base address for the huge page containing address a. */ +#define HUGEPAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define HUGEPAGE_CEILING(s) \ + (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK) + +/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */ +#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE) +# define PAGES_CAN_PURGE_LAZY +#endif +/* + * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported. + * + * The only supported way to hard-purge on Windows is to decommit and then + * re-commit, but doing so is racy, and if re-commit fails it's a pain to + * propagate the "poisoned" memory state. Since we typically decommit as the + * next step after purging on Windows anyway, there's no point in adding such + * complexity. + */ +#if !defined(_WIN32) && defined(JEMALLOC_PURGE_MADVISE_DONTNEED) +# define PAGES_CAN_PURGE_FORCED +#endif + +#endif /* JEMALLOC_INTERNAL_PAGES_TYPES_H */ diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h deleted file mode 100644 index 94fd55a..0000000 --- a/include/jemalloc/internal/prng.h +++ /dev/null @@ -1,207 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Simple linear congruential pseudo-random number generator: - * - * prng(y) = (a*x + c) % m - * - * where the following constants ensure maximal period: - * - * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. - * c == Odd number (relatively prime to 2^n). - * m == 2^32 - * - * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. - * - * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example, the lowest bit has a cycle of 2, - * the next has a cycle of 4, etc. For this reason, we prefer to use the upper - * bits. - */ - -#define PRNG_A_32 UINT32_C(1103515241) -#define PRNG_C_32 UINT32_C(12347) - -#define PRNG_A_64 UINT64_C(6364136223846793005) -#define PRNG_C_64 UINT64_C(1442695040888963407) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t prng_state_next_u32(uint32_t state); -uint64_t prng_state_next_u64(uint64_t state); -size_t prng_state_next_zu(size_t state); - -uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, - bool atomic); -uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); -size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); - -uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); -uint64_t prng_range_u64(uint64_t *state, uint64_t range); -size_t prng_range_zu(size_t *state, size_t range, bool atomic); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) -JEMALLOC_ALWAYS_INLINE uint32_t -prng_state_next_u32(uint32_t state) -{ - - return ((state * PRNG_A_32) + PRNG_C_32); -} - -JEMALLOC_ALWAYS_INLINE uint64_t -prng_state_next_u64(uint64_t state) -{ - - return ((state * PRNG_A_64) + PRNG_C_64); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_state_next_zu(size_t state) -{ - -#if LG_SIZEOF_PTR == 2 - return ((state * PRNG_A_32) + PRNG_C_32); -#elif LG_SIZEOF_PTR == 3 - return ((state * PRNG_A_64) + PRNG_C_64); -#else -#error Unsupported pointer size -#endif -} - -JEMALLOC_ALWAYS_INLINE uint32_t -prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) -{ - uint32_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= 32); - - if (atomic) { - uint32_t state0; - - do { - state0 = atomic_read_u32(state); - state1 = prng_state_next_u32(state0); - } while (atomic_cas_u32(state, state0, state1)); - } else { - state1 = prng_state_next_u32(*state); - *state = state1; - } - ret = state1 >> (32 - lg_range); - - return (ret); -} - -/* 64-bit atomic operations cannot be supported on all relevant platforms. */ -JEMALLOC_ALWAYS_INLINE uint64_t -prng_lg_range_u64(uint64_t *state, unsigned lg_range) -{ - uint64_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= 64); - - state1 = prng_state_next_u64(*state); - *state = state1; - ret = state1 >> (64 - lg_range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) -{ - size_t ret, state1; - - assert(lg_range > 0); - assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); - - if (atomic) { - size_t state0; - - do { - state0 = atomic_read_zu(state); - state1 = prng_state_next_zu(state0); - } while (atomic_cas_zu(state, state0, state1)); - } else { - state1 = prng_state_next_zu(*state); - *state = state1; - } - ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE uint32_t -prng_range_u32(uint32_t *state, uint32_t range, bool atomic) -{ - uint32_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_u32(state, lg_range, atomic); - } while (ret >= range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE uint64_t -prng_range_u64(uint64_t *state, uint64_t range) -{ - uint64_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_u64(state, lg_range); - } while (ret >= range); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -prng_range_zu(size_t *state, size_t range, bool atomic) -{ - size_t ret; - unsigned lg_range; - - assert(range > 1); - - /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; - - /* Generate a result in [0..range) via repeated trial. */ - do { - ret = prng_lg_range_zu(state, lg_range, atomic); - } while (ret >= range); - - return (ret); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h new file mode 100644 index 0000000..b82a662 --- /dev/null +++ b/include/jemalloc/internal/prng_inlines.h @@ -0,0 +1,169 @@ +#ifndef JEMALLOC_INTERNAL_PRNG_INLINES_H +#define JEMALLOC_INTERNAL_PRNG_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t prng_state_next_u32(uint32_t state); +uint64_t prng_state_next_u64(uint64_t state); +size_t prng_state_next_zu(size_t state); + +uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, + bool atomic); +uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); +size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); + +uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); +uint64_t prng_range_u64(uint64_t *state, uint64_t range); +size_t prng_range_zu(size_t *state, size_t range, bool atomic); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) +{ + + return ((state * PRNG_A_32) + PRNG_C_32); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) +{ + + return ((state * PRNG_A_64) + PRNG_C_64); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) +{ + +#if LG_SIZEOF_PTR == 2 + return ((state * PRNG_A_32) + PRNG_C_32); +#elif LG_SIZEOF_PTR == 3 + return ((state * PRNG_A_64) + PRNG_C_64); +#else +#error Unsupported pointer size +#endif +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) +{ + uint32_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + if (atomic) { + uint32_t state0; + + do { + state0 = atomic_read_u32(state); + state1 = prng_state_next_u32(state0); + } while (atomic_cas_u32(state, state0, state1)); + } else { + state1 = prng_state_next_u32(*state); + *state = state1; + } + ret = state1 >> (32 - lg_range); + + return (ret); +} + +/* 64-bit atomic operations cannot be supported on all relevant platforms. */ +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) +{ + uint64_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 64); + + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) +{ + size_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + if (atomic) { + size_t state0; + + do { + state0 = atomic_read_zu(state); + state1 = prng_state_next_zu(state0); + } while (atomic_cas_zu(state, state0, state1)); + } else { + state1 = prng_state_next_zu(*state); + *state = state1; + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(uint32_t *state, uint32_t range, bool atomic) +{ + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range_u64(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(size_t *state, size_t range, bool atomic) +{ + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} +#endif + +#endif /* JEMALLOC_INTERNAL_PRNG_INLINES_H */ diff --git a/include/jemalloc/internal/prng_types.h b/include/jemalloc/internal/prng_types.h new file mode 100644 index 0000000..dec44c0 --- /dev/null +++ b/include/jemalloc/internal/prng_types.h @@ -0,0 +1,29 @@ +#ifndef JEMALLOC_INTERNAL_PRNG_TYPES_H +#define JEMALLOC_INTERNAL_PRNG_TYPES_H + +/* + * Simple linear congruential pseudo-random number generator: + * + * prng(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example, the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + */ + +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) + +#endif /* JEMALLOC_INTERNAL_PRNG_TYPES_H */ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h deleted file mode 100644 index 2d1791b..0000000 --- a/include/jemalloc/internal/prof.h +++ /dev/null @@ -1,568 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#define PROF_BT_MAX 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUFSIZE 65536 - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* - * prof_tdata pointers close to NULL are used to encode state information that - * is used for cleaning up during thread shutdown. - */ -#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) -#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) -#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - /* Included in heap profile dumps if non-NULL. */ - char *thread_name; - - bool attached; - bool expired; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Sampling state. */ - uint64_t prng_state; - uint64_t bytes_until_sample; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void *vec[PROF_BT_MAX]; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[ - /* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); -void prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, - const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); -void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt); -prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); -#ifdef JEMALLOC_JET -size_t prof_tdata_count(void); -size_t prof_bt_count(void); -const prof_cnt_t *prof_cnt_all(void); -typedef int (prof_dump_open_t)(bool, const char *); -extern prof_dump_open_t *prof_dump_open; -typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); -extern prof_dump_header_t *prof_dump_header; -#endif -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); -void prof_reset(tsd_t *tsd, size_t lg_sample); -void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(tsd_t *tsd); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); -void prof_sample_threshold_update(prof_tdata_t *tdata); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool prof_active_get_unlocked(void); -bool prof_gdump_get_unlocked(void); -prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); -prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, - const void *ptr); -void prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx); -void prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx); -bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, - bool update); -void prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx); -void prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, - extent_t *old_extent, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx); -void prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, - size_t usize); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) -{ - - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return (prof_active); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_gdump_get_unlocked(void) -{ - - /* - * No locking is used when reading prof_gdump_val in the fast path, so - * there are no guarantees regarding how long it will take for all - * threads to notice state changes. - */ - return (prof_gdump_val); -} - -JEMALLOC_ALWAYS_INLINE prof_tdata_t * -prof_tdata_get(tsd_t *tsd, bool create) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = tsd_prof_tdata_get(tsd); - if (create) { - if (unlikely(tdata == NULL)) { - if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd); - tsd_prof_tdata_set(tsd, tdata); - } - } else if (unlikely(tdata->expired)) { - tdata = prof_tdata_reinit(tsd, tdata); - tsd_prof_tdata_set(tsd, tdata); - } - assert(tdata == NULL || tdata->attached); - } - - return (tdata); -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) -{ - - cassert(config_prof); - assert(ptr != NULL); - - return (arena_prof_tctx_get(tsdn, extent, ptr)); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_reset(tsdn, extent, ptr, tctx); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = prof_tdata_get(tsd, true); - if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) - tdata = NULL; - - if (tdata_out != NULL) - *tdata_out = tdata; - - if (unlikely(tdata == NULL)) - return (true); - - if (likely(tdata->bytes_until_sample >= usize)) { - if (update) - tdata->bytes_until_sample -= usize; - return (true); - } else { - /* Compute new sample threshold. */ - if (update) - prof_sample_threshold_update(tdata); - return (!tdata->active); - } -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) -{ - prof_tctx_t *ret; - prof_tdata_t *tdata; - prof_bt_t bt; - - assert(usize == s2u(usize)); - - if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, - &tdata))) - ret = (prof_tctx_t *)(uintptr_t)1U; - else { - bt_init(&bt, tdata->vec); - prof_backtrace(&bt); - ret = prof_lookup(tsd, &bt); - } - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - assert(usize == isalloc(tsdn, extent, ptr)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx); - else { - prof_tctx_set(tsdn, extent, ptr, usize, - (prof_tctx_t *)(uintptr_t)1U); - } -} - -JEMALLOC_ALWAYS_INLINE void -prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent, - const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) -{ - bool sampled, old_sampled, moved; - - cassert(config_prof); - assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - - if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); - if (prof_sample_accum_update(tsd, usize, true, NULL)) { - /* - * Don't sample. The usize passed to prof_alloc_prep() - * was larger than what actually got allocated, so a - * backtrace was captured for this allocation, even - * though its actual usize was insufficient to cross the - * sample threshold. - */ - prof_alloc_rollback(tsd, tctx, true); - tctx = (prof_tctx_t *)(uintptr_t)1U; - } - } - - /* - * The following code must differentiate among eight possible cases, - * based on three boolean conditions. - */ - sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); - moved = (ptr != old_ptr); - - /* - * The following block must only execute if this is a non-moving - * reallocation, because for moving reallocation the old allocation will - * be deallocated via a separate call. - */ - if (unlikely(old_sampled) && !moved) - prof_free_sampled_object(tsd, old_usize, old_tctx); - - if (unlikely(sampled)) { - prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize, - tctx); - } else if (moved) { - prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize, - (prof_tctx_t *)(uintptr_t)1U); - } else if (unlikely(old_sampled)) - prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) -{ - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr); - - cassert(config_prof); - assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_free_sampled_object(tsd, usize, tctx); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h new file mode 100644 index 0000000..3f85714 --- /dev/null +++ b/include/jemalloc/internal/prof_externs.h @@ -0,0 +1,83 @@ +#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H +#define JEMALLOC_INTERNAL_PROF_EXTERNS_H + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, + const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_tdata_count(void); +size_t prof_bt_count(void); +const prof_cnt_t *prof_cnt_all(void); +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *prof_dump_open; +typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); +extern prof_dump_header_t *prof_dump_header; +#endif +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); +void prof_sample_threshold_update(prof_tdata_t *tdata); + +#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h new file mode 100644 index 0000000..0b58042 --- /dev/null +++ b/include/jemalloc/internal/prof_inlines.h @@ -0,0 +1,242 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H +#define JEMALLOC_INTERNAL_PROF_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, + const void *ptr); +void prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); +void prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx); +void prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, + extent_t *old_extent, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, + size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) +{ + + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return (prof_active); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) +{ + + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return (prof_gdump_val); +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } + + return (tdata); +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) +{ + + cassert(config_prof); + assert(ptr != NULL); + + return (arena_prof_tctx_get(tsdn, extent, ptr)); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(tsdn, extent, ptr, tctx); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = prof_tdata_get(tsd, true); + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) + tdata = NULL; + + if (tdata_out != NULL) + *tdata_out = tdata; + + if (unlikely(tdata == NULL)) + return (true); + + if (likely(tdata->bytes_until_sample >= usize)) { + if (update) + tdata->bytes_until_sample -= usize; + return (true); + } else { + /* Compute new sample threshold. */ + if (update) + prof_sample_threshold_update(tdata); + return (!tdata->active); + } +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) +{ + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(tsdn, extent, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx); + else { + prof_tctx_set(tsdn, extent, ptr, usize, + (prof_tctx_t *)(uintptr_t)1U); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent, + const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) +{ + bool sampled, old_sampled, moved; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); + + if (prof_active && !updated && ptr != NULL) { + assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); + if (prof_sample_accum_update(tsd, usize, true, NULL)) { + /* + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. + */ + prof_alloc_rollback(tsd, tctx, true); + tctx = (prof_tctx_t *)(uintptr_t)1U; + } + } + + /* + * The following code must differentiate among eight possible cases, + * based on three boolean conditions. + */ + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + moved = (ptr != old_ptr); + + /* + * The following block must only execute if this is a non-moving + * reallocation, because for moving reallocation the old allocation will + * be deallocated via a separate call. + */ + if (unlikely(old_sampled) && !moved) + prof_free_sampled_object(tsd, old_usize, old_tctx); + + if (unlikely(sampled)) { + prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize, + tctx); + } else if (moved) { + prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize, + (prof_tctx_t *)(uintptr_t)1U); + } else if (unlikely(old_sampled)) + prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) +{ + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr); + + cassert(config_prof); + assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_free_sampled_object(tsd, usize, tctx); +} +#endif + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */ diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h new file mode 100644 index 0000000..caae125 --- /dev/null +++ b/include/jemalloc/internal/prof_structs.h @@ -0,0 +1,187 @@ +#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H +#define JEMALLOC_INTERNAL_PROF_STRUCTS_H + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; + + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t bytes_until_sample; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h new file mode 100644 index 0000000..e1eb7fb --- /dev/null +++ b/include/jemalloc/internal/prof_types.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H +#define JEMALLOC_INTERNAL_PROF_TYPES_H + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + +#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h index 1834bb8..424485c 100644 --- a/include/jemalloc/internal/ql.h +++ b/include/jemalloc/internal/ql.h @@ -1,3 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_QL_H +#define JEMALLOC_INTERNAL_QL_H + /* List definitions. */ #define ql_head(a_type) \ struct { \ @@ -79,3 +82,5 @@ struct { \ #define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) + +#endif /* JEMALLOC_INTERNAL_QL_H */ diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h index 3b5d027..06dfdaf 100644 --- a/include/jemalloc/internal/qr.h +++ b/include/jemalloc/internal/qr.h @@ -1,3 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_QR_H +#define JEMALLOC_INTERNAL_QR_H + /* Ring definitions. */ #define qr(a_type) \ struct { \ @@ -67,3 +70,5 @@ struct { \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ ? (var)->a_field.qre_prev : NULL)) + +#endif /* JEMALLOC_INTERNAL_QR_H */ diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h deleted file mode 100644 index b2a2800..0000000 --- a/include/jemalloc/internal/rtree.h +++ /dev/null @@ -1,608 +0,0 @@ -/* - * This radix tree implementation is tailored to the singular purpose of - * associating metadata with extents that are currently owned by jemalloc. - * - ******************************************************************************* - */ -#ifdef JEMALLOC_H_TYPES - -typedef struct rtree_elm_s rtree_elm_t; -typedef struct rtree_elm_witness_s rtree_elm_witness_t; -typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t; -typedef struct rtree_level_s rtree_level_t; -typedef struct rtree_ctx_s rtree_ctx_t; -typedef struct rtree_s rtree_t; - -/* - * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the - * machine address width. - */ -#define LG_RTREE_BITS_PER_LEVEL 4 -#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) -/* Maximum rtree height. */ -#define RTREE_HEIGHT_MAX \ - ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) - -#define RTREE_CTX_INITIALIZER { \ - false, \ - 0, \ - 0, \ - {NULL /* C initializes all trailing elements to NULL. */} \ -} - -/* - * Maximum number of concurrently acquired elements per thread. This controls - * how many witness_t structures are embedded in tsd. Ideally rtree_elm_t would - * have a witness_t directly embedded, but that would dramatically bloat the - * tree. This must contain enough entries to e.g. coalesce two extents. - */ -#define RTREE_ELM_ACQUIRE_MAX 4 - -/* Initializers for rtree_elm_witness_tsd_t. */ -#define RTREE_ELM_WITNESS_INITIALIZER { \ - NULL, \ - WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM) \ -} - -#define RTREE_ELM_WITNESS_TSD_INITIALIZER { \ - { \ - RTREE_ELM_WITNESS_INITIALIZER, \ - RTREE_ELM_WITNESS_INITIALIZER, \ - RTREE_ELM_WITNESS_INITIALIZER, \ - RTREE_ELM_WITNESS_INITIALIZER \ - } \ -} - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct rtree_elm_s { - union { - void *pun; - rtree_elm_t *child; - extent_t *extent; - }; -}; - -struct rtree_elm_witness_s { - const rtree_elm_t *elm; - witness_t witness; -}; - -struct rtree_elm_witness_tsd_s { - rtree_elm_witness_t witnesses[RTREE_ELM_ACQUIRE_MAX]; -}; - -struct rtree_level_s { - /* - * A non-NULL subtree points to a subtree rooted along the hypothetical - * path to the leaf node corresponding to key 0. Depending on what keys - * have been used to store to the tree, an arbitrary combination of - * subtree pointers may remain NULL. - * - * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. - * This results in a 3-level tree, and the leftmost leaf can be directly - * accessed via levels[2], the subtree prefixed by 0x0000 (excluding - * 0x00000000) can be accessed via levels[1], and the remainder of the - * tree can be accessed via levels[0]. - * - * levels[0] : [ | 0x0001******** | 0x0002******** | ...] - * - * levels[1] : [ | 0x00000001**** | 0x00000002**** | ... ] - * - * levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...] - * - * This has practical implications on x64, which currently uses only the - * lower 47 bits of virtual address space in userland, thus leaving - * levels[0] unused and avoiding a level of tree traversal. - */ - union { - void *subtree_pun; - rtree_elm_t *subtree; - }; - /* Number of key bits distinguished by this level. */ - unsigned bits; - /* - * Cumulative number of key bits distinguished by traversing to - * corresponding tree level. - */ - unsigned cumbits; -}; - -struct rtree_ctx_s { - /* If false, key/elms have not yet been initialized by a lookup. */ - bool valid; - /* Key that corresponds to the tree path recorded in elms. */ - uintptr_t key; - /* Memoized rtree_start_level(key). */ - unsigned start_level; - /* - * A path through rtree, driven by key. Only elements that could - * actually be used for subsequent lookups are initialized, i.e. if - * start_level = rtree_start_level(key) is non-zero, the first - * start_level elements are uninitialized. The last element contains a - * pointer to the leaf node element that corresponds to key, so that - * exact matches require no tree node offset computation. - */ - rtree_elm_t *elms[RTREE_HEIGHT_MAX + 1]; -}; - -struct rtree_s { - unsigned height; - /* - * Precomputed table used to convert from the number of leading 0 key - * bits to which subtree level to start at. - */ - unsigned start_level[RTREE_HEIGHT_MAX + 1]; - rtree_level_t levels[RTREE_HEIGHT_MAX]; - malloc_mutex_t init_lock; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool rtree_new(rtree_t *rtree, unsigned bits); -#ifdef JEMALLOC_JET -typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); -extern rtree_node_alloc_t *rtree_node_alloc; -typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *); -extern rtree_node_dalloc_t *rtree_node_dalloc; -void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); -#endif -rtree_elm_t *rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, - unsigned level); -rtree_elm_t *rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, - rtree_elm_t *elm, unsigned level); -void rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, - uintptr_t key, const rtree_elm_t *elm); -void rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree, - const rtree_elm_t *elm); -void rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree, - const rtree_elm_t *elm); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned rtree_start_level(const rtree_t *rtree, uintptr_t key); -unsigned rtree_ctx_start_level(const rtree_t *rtree, - const rtree_ctx_t *rtree_ctx, uintptr_t key); -uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); - -bool rtree_node_valid(rtree_elm_t *node); -rtree_elm_t *rtree_child_tryread(rtree_elm_t *elm, bool dependent); -rtree_elm_t *rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, - unsigned level, bool dependent); -extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent); -void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent); -rtree_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, - bool dependent); -rtree_elm_t *rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, - unsigned level, bool dependent); -rtree_elm_t *rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, - rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); - -bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, const extent_t *extent); -extent_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree, - rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent); -rtree_elm_t *rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, - rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); -extent_t *rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, - rtree_elm_t *elm); -void rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, - rtree_elm_t *elm, const extent_t *extent); -void rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm); -void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -JEMALLOC_ALWAYS_INLINE unsigned -rtree_start_level(const rtree_t *rtree, uintptr_t key) -{ - unsigned start_level; - - if (unlikely(key == 0)) - return (rtree->height - 1); - - start_level = rtree->start_level[(lg_floor(key) + 1) >> - LG_RTREE_BITS_PER_LEVEL]; - assert(start_level < rtree->height); - return (start_level); -} - -JEMALLOC_ALWAYS_INLINE unsigned -rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx, - uintptr_t key) -{ - unsigned start_level; - uintptr_t key_diff; - - /* Compute the difference between old and new lookup keys. */ - key_diff = key ^ rtree_ctx->key; - assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */ - - /* - * Compute the last traversal path element at which the keys' paths - * are the same. - */ - start_level = rtree->start_level[(lg_floor(key_diff) + 1) >> - LG_RTREE_BITS_PER_LEVEL]; - assert(start_level < rtree->height); - return (start_level); -} - -JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) -{ - - return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - rtree->levels[level].cumbits)) & ((ZU(1) << - rtree->levels[level].bits) - 1)); -} - -JEMALLOC_ALWAYS_INLINE bool -rtree_node_valid(rtree_elm_t *node) -{ - - return ((uintptr_t)node != (uintptr_t)0); -} - -JEMALLOC_ALWAYS_INLINE rtree_elm_t * -rtree_child_tryread(rtree_elm_t *elm, bool dependent) -{ - rtree_elm_t *child; - - /* Double-checked read (first read may be stale). */ - child = elm->child; - if (!dependent && !rtree_node_valid(child)) - child = (rtree_elm_t *)atomic_read_p(&elm->pun); - assert(!dependent || child != NULL); - return (child); -} - -JEMALLOC_ALWAYS_INLINE rtree_elm_t * -rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level, - bool dependent) -{ - rtree_elm_t *child; - - child = rtree_child_tryread(elm, dependent); - if (!dependent && unlikely(!rtree_node_valid(child))) - child = rtree_child_read_hard(tsdn, rtree, elm, level); - assert(!dependent || child != NULL); - return (child); -} - -JEMALLOC_ALWAYS_INLINE extent_t * -rtree_elm_read(rtree_elm_t *elm, bool dependent) -{ - extent_t *extent; - - if (dependent) { - /* - * Reading a value on behalf of a pointer to a valid allocation - * is guaranteed to be a clean read even without - * synchronization, because the rtree update became visible in - * memory before the pointer came into existence. - */ - extent = elm->extent; - } else { - /* - * An arbitrary read, e.g. on behalf of ivsalloc(), may not be - * dependent on a previous rtree write, which means a stale read - * could result if synchronization were omitted here. - */ - extent = (extent_t *)atomic_read_p(&elm->pun); - } - - /* Mask the lock bit. */ - extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1)); - - return (extent); -} - -JEMALLOC_INLINE void -rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) -{ - - atomic_write_p(&elm->pun, extent); -} - -JEMALLOC_ALWAYS_INLINE rtree_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) -{ - rtree_elm_t *subtree; - - /* Double-checked read (first read may be stale). */ - subtree = rtree->levels[level].subtree; - if (!dependent && unlikely(!rtree_node_valid(subtree))) { - subtree = (rtree_elm_t *)atomic_read_p( - &rtree->levels[level].subtree_pun); - } - assert(!dependent || subtree != NULL); - return (subtree); -} - -JEMALLOC_ALWAYS_INLINE rtree_elm_t * -rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level, bool dependent) -{ - rtree_elm_t *subtree; - - subtree = rtree_subtree_tryread(rtree, level, dependent); - if (!dependent && unlikely(!rtree_node_valid(subtree))) - subtree = rtree_subtree_read_hard(tsdn, rtree, level); - assert(!dependent || subtree != NULL); - return (subtree); -} - -JEMALLOC_ALWAYS_INLINE rtree_elm_t * -rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent, bool init_missing) -{ - uintptr_t subkey; - unsigned start_level; - rtree_elm_t *node; - - assert(!dependent || !init_missing); - - if (dependent || init_missing) { - if (likely(rtree_ctx->valid)) { - if (key == rtree_ctx->key) - return (rtree_ctx->elms[rtree->height]); - else { - unsigned no_ctx_start_level = - rtree_start_level(rtree, key); - unsigned ctx_start_level; - - if (likely(no_ctx_start_level <= - rtree_ctx->start_level && (ctx_start_level = - rtree_ctx_start_level(rtree, rtree_ctx, - key)) >= rtree_ctx->start_level)) { - start_level = ctx_start_level; - node = rtree_ctx->elms[ctx_start_level]; - } else { - start_level = no_ctx_start_level; - node = init_missing ? - rtree_subtree_read(tsdn, rtree, - no_ctx_start_level, dependent) : - rtree_subtree_tryread(rtree, - no_ctx_start_level, dependent); - rtree_ctx->start_level = - no_ctx_start_level; - rtree_ctx->elms[no_ctx_start_level] = - node; - } - } - } else { - unsigned no_ctx_start_level = rtree_start_level(rtree, - key); - - start_level = no_ctx_start_level; - node = init_missing ? rtree_subtree_read(tsdn, rtree, - no_ctx_start_level, dependent) : - rtree_subtree_tryread(rtree, no_ctx_start_level, - dependent); - rtree_ctx->valid = true; - rtree_ctx->start_level = no_ctx_start_level; - rtree_ctx->elms[no_ctx_start_level] = node; - } - rtree_ctx->key = key; - } else { - start_level = rtree_start_level(rtree, key); - node = init_missing ? rtree_subtree_read(tsdn, rtree, - start_level, dependent) : rtree_subtree_tryread(rtree, - start_level, dependent); - } - -#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) - switch (start_level + RTREE_GET_BIAS) { -#define RTREE_GET_SUBTREE(level) \ - case level: \ - assert(level < (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) { \ - if (init_missing) \ - rtree_ctx->valid = false; \ - return (NULL); \ - } \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - node = init_missing ? rtree_child_read(tsdn, rtree, \ - &node[subkey], level - RTREE_GET_BIAS, dependent) : \ - rtree_child_tryread(&node[subkey], dependent); \ - if (dependent || init_missing) { \ - rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \ - node; \ - } \ - /* Fall through. */ -#define RTREE_GET_LEAF(level) \ - case level: \ - assert(level == (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) { \ - if (init_missing) \ - rtree_ctx->valid = false; \ - return (NULL); \ - } \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - /* \ - * node is a leaf, so it contains values rather than \ - * child pointers. \ - */ \ - node = &node[subkey]; \ - if (dependent || init_missing) { \ - rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \ - node; \ - } \ - return (node); -#if RTREE_HEIGHT_MAX > 1 - RTREE_GET_SUBTREE(0) -#endif -#if RTREE_HEIGHT_MAX > 2 - RTREE_GET_SUBTREE(1) -#endif -#if RTREE_HEIGHT_MAX > 3 - RTREE_GET_SUBTREE(2) -#endif -#if RTREE_HEIGHT_MAX > 4 - RTREE_GET_SUBTREE(3) -#endif -#if RTREE_HEIGHT_MAX > 5 - RTREE_GET_SUBTREE(4) -#endif -#if RTREE_HEIGHT_MAX > 6 - RTREE_GET_SUBTREE(5) -#endif -#if RTREE_HEIGHT_MAX > 7 - RTREE_GET_SUBTREE(6) -#endif -#if RTREE_HEIGHT_MAX > 8 - RTREE_GET_SUBTREE(7) -#endif -#if RTREE_HEIGHT_MAX > 9 - RTREE_GET_SUBTREE(8) -#endif -#if RTREE_HEIGHT_MAX > 10 - RTREE_GET_SUBTREE(9) -#endif -#if RTREE_HEIGHT_MAX > 11 - RTREE_GET_SUBTREE(10) -#endif -#if RTREE_HEIGHT_MAX > 12 - RTREE_GET_SUBTREE(11) -#endif -#if RTREE_HEIGHT_MAX > 13 - RTREE_GET_SUBTREE(12) -#endif -#if RTREE_HEIGHT_MAX > 14 - RTREE_GET_SUBTREE(13) -#endif -#if RTREE_HEIGHT_MAX > 15 - RTREE_GET_SUBTREE(14) -#endif -#if RTREE_HEIGHT_MAX > 16 -# error Unsupported RTREE_HEIGHT_MAX -#endif - RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) -#undef RTREE_GET_SUBTREE -#undef RTREE_GET_LEAF - default: not_reached(); - } -#undef RTREE_GET_BIAS - not_reached(); -} - -JEMALLOC_INLINE bool -rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, - const extent_t *extent) -{ - rtree_elm_t *elm; - - assert(extent != NULL); /* Use rtree_clear() for this case. */ - assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); - - elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true); - if (elm == NULL) - return (true); - assert(rtree_elm_read(elm, false) == NULL); - rtree_elm_write(elm, extent); - - return (false); -} - -JEMALLOC_ALWAYS_INLINE extent_t * -rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, - bool dependent) -{ - rtree_elm_t *elm; - - elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false); - if (elm == NULL) - return (NULL); - - return (rtree_elm_read(elm, dependent)); -} - -JEMALLOC_INLINE rtree_elm_t * -rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, - uintptr_t key, bool dependent, bool init_missing) -{ - rtree_elm_t *elm; - - elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, - init_missing); - if (!dependent && elm == NULL) - return (NULL); - { - extent_t *extent; - void *s; - - do { - extent = rtree_elm_read(elm, false); - /* The least significant bit serves as a lock. */ - s = (void *)((uintptr_t)extent | (uintptr_t)0x1); - } while (atomic_cas_p(&elm->pun, (void *)extent, s)); - } - - if (config_debug) - rtree_elm_witness_acquire(tsdn, rtree, key, elm); - - return (elm); -} - -JEMALLOC_INLINE extent_t * -rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) -{ - extent_t *extent; - - assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); - extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1)); - assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); - - if (config_debug) - rtree_elm_witness_access(tsdn, rtree, elm); - - return (extent); -} - -JEMALLOC_INLINE void -rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm, - const extent_t *extent) -{ - - assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); - assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); - - if (config_debug) - rtree_elm_witness_access(tsdn, rtree, elm); - - elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1); - assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent); -} - -JEMALLOC_INLINE void -rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) -{ - - rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm)); - if (config_debug) - rtree_elm_witness_release(tsdn, rtree, elm); -} - -JEMALLOC_INLINE void -rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) -{ - rtree_elm_t *elm; - - elm = rtree_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false); - rtree_elm_write_acquired(tsdn, rtree, elm, NULL); - rtree_elm_release(tsdn, rtree, elm); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h new file mode 100644 index 0000000..db8e8b1 --- /dev/null +++ b/include/jemalloc/internal/rtree_externs.h @@ -0,0 +1,23 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_EXTERNS_H +#define JEMALLOC_INTERNAL_RTREE_EXTERNS_H + +bool rtree_new(rtree_t *rtree, unsigned bits); +#ifdef JEMALLOC_JET +typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_node_alloc_t *rtree_node_alloc; +typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *); +extern rtree_node_dalloc_t *rtree_node_dalloc; +void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); +#endif +rtree_elm_t *rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, + unsigned level); +rtree_elm_t *rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, + rtree_elm_t *elm, unsigned level); +void rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, + uintptr_t key, const rtree_elm_t *elm); +void rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree, + const rtree_elm_t *elm); +void rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree, + const rtree_elm_t *elm); + +#endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */ diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h new file mode 100644 index 0000000..7efba54 --- /dev/null +++ b/include/jemalloc/internal/rtree_inlines.h @@ -0,0 +1,442 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H +#define JEMALLOC_INTERNAL_RTREE_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned rtree_start_level(const rtree_t *rtree, uintptr_t key); +unsigned rtree_ctx_start_level(const rtree_t *rtree, + const rtree_ctx_t *rtree_ctx, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_elm_t *node); +rtree_elm_t *rtree_child_tryread(rtree_elm_t *elm, bool dependent); +rtree_elm_t *rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, + unsigned level, bool dependent); +extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent); +void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent); +rtree_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, + bool dependent); +rtree_elm_t *rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, + unsigned level, bool dependent); +rtree_elm_t *rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); + +bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, const extent_t *extent); +extent_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent); +rtree_elm_t *rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); +extent_t *rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, + rtree_elm_t *elm); +void rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, + rtree_elm_t *elm, const extent_t *extent); +void rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm); +void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +JEMALLOC_ALWAYS_INLINE unsigned +rtree_start_level(const rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[(lg_floor(key) + 1) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); +} + +JEMALLOC_ALWAYS_INLINE unsigned +rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx, + uintptr_t key) +{ + unsigned start_level; + uintptr_t key_diff; + + /* Compute the difference between old and new lookup keys. */ + key_diff = key ^ rtree_ctx->key; + assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */ + + /* + * Compute the last traversal path element at which the keys' paths + * are the same. + */ + start_level = rtree->start_level[(lg_floor(key_diff) + 1) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); +} + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ + + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_node_valid(rtree_elm_t *node) +{ + + return ((uintptr_t)node != (uintptr_t)0); +} + +JEMALLOC_ALWAYS_INLINE rtree_elm_t * +rtree_child_tryread(rtree_elm_t *elm, bool dependent) +{ + rtree_elm_t *child; + + /* Double-checked read (first read may be stale). */ + child = elm->child; + if (!dependent && !rtree_node_valid(child)) + child = (rtree_elm_t *)atomic_read_p(&elm->pun); + assert(!dependent || child != NULL); + return (child); +} + +JEMALLOC_ALWAYS_INLINE rtree_elm_t * +rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level, + bool dependent) +{ + rtree_elm_t *child; + + child = rtree_child_tryread(elm, dependent); + if (!dependent && unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(tsdn, rtree, elm, level); + assert(!dependent || child != NULL); + return (child); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_elm_read(rtree_elm_t *elm, bool dependent) +{ + extent_t *extent; + + if (dependent) { + /* + * Reading a value on behalf of a pointer to a valid allocation + * is guaranteed to be a clean read even without + * synchronization, because the rtree update became visible in + * memory before the pointer came into existence. + */ + extent = elm->extent; + } else { + /* + * An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ + extent = (extent_t *)atomic_read_p(&elm->pun); + } + + /* Mask the lock bit. */ + extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1)); + + return (extent); +} + +JEMALLOC_INLINE void +rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) +{ + + atomic_write_p(&elm->pun, extent); +} + +JEMALLOC_ALWAYS_INLINE rtree_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) +{ + rtree_elm_t *subtree; + + /* Double-checked read (first read may be stale). */ + subtree = rtree->levels[level].subtree; + if (!dependent && unlikely(!rtree_node_valid(subtree))) { + subtree = (rtree_elm_t *)atomic_read_p( + &rtree->levels[level].subtree_pun); + } + assert(!dependent || subtree != NULL); + return (subtree); +} + +JEMALLOC_ALWAYS_INLINE rtree_elm_t * +rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level, bool dependent) +{ + rtree_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level, dependent); + if (!dependent && unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(tsdn, rtree, level); + assert(!dependent || subtree != NULL); + return (subtree); +} + +JEMALLOC_ALWAYS_INLINE rtree_elm_t * +rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, bool init_missing) +{ + uintptr_t subkey; + unsigned start_level; + rtree_elm_t *node; + + assert(!dependent || !init_missing); + + if (dependent || init_missing) { + if (likely(rtree_ctx->valid)) { + if (key == rtree_ctx->key) + return (rtree_ctx->elms[rtree->height]); + else { + unsigned no_ctx_start_level = + rtree_start_level(rtree, key); + unsigned ctx_start_level; + + if (likely(no_ctx_start_level <= + rtree_ctx->start_level && (ctx_start_level = + rtree_ctx_start_level(rtree, rtree_ctx, + key)) >= rtree_ctx->start_level)) { + start_level = ctx_start_level; + node = rtree_ctx->elms[ctx_start_level]; + } else { + start_level = no_ctx_start_level; + node = init_missing ? + rtree_subtree_read(tsdn, rtree, + no_ctx_start_level, dependent) : + rtree_subtree_tryread(rtree, + no_ctx_start_level, dependent); + rtree_ctx->start_level = + no_ctx_start_level; + rtree_ctx->elms[no_ctx_start_level] = + node; + } + } + } else { + unsigned no_ctx_start_level = rtree_start_level(rtree, + key); + + start_level = no_ctx_start_level; + node = init_missing ? rtree_subtree_read(tsdn, rtree, + no_ctx_start_level, dependent) : + rtree_subtree_tryread(rtree, no_ctx_start_level, + dependent); + rtree_ctx->valid = true; + rtree_ctx->start_level = no_ctx_start_level; + rtree_ctx->elms[no_ctx_start_level] = node; + } + rtree_ctx->key = key; + } else { + start_level = rtree_start_level(rtree, key); + node = init_missing ? rtree_subtree_read(tsdn, rtree, + start_level, dependent) : rtree_subtree_tryread(rtree, + start_level, dependent); + } + +#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) + switch (start_level + RTREE_GET_BIAS) { +#define RTREE_GET_SUBTREE(level) \ + case level: \ + assert(level < (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) { \ + if (init_missing) \ + rtree_ctx->valid = false; \ + return (NULL); \ + } \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + node = init_missing ? rtree_child_read(tsdn, rtree, \ + &node[subkey], level - RTREE_GET_BIAS, dependent) : \ + rtree_child_tryread(&node[subkey], dependent); \ + if (dependent || init_missing) { \ + rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \ + node; \ + } \ + /* Fall through. */ +#define RTREE_GET_LEAF(level) \ + case level: \ + assert(level == (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) { \ + if (init_missing) \ + rtree_ctx->valid = false; \ + return (NULL); \ + } \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + /* \ + * node is a leaf, so it contains values rather than \ + * child pointers. \ + */ \ + node = &node[subkey]; \ + if (dependent || init_missing) { \ + rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \ + node; \ + } \ + return (node); +#if RTREE_HEIGHT_MAX > 1 + RTREE_GET_SUBTREE(0) +#endif +#if RTREE_HEIGHT_MAX > 2 + RTREE_GET_SUBTREE(1) +#endif +#if RTREE_HEIGHT_MAX > 3 + RTREE_GET_SUBTREE(2) +#endif +#if RTREE_HEIGHT_MAX > 4 + RTREE_GET_SUBTREE(3) +#endif +#if RTREE_HEIGHT_MAX > 5 + RTREE_GET_SUBTREE(4) +#endif +#if RTREE_HEIGHT_MAX > 6 + RTREE_GET_SUBTREE(5) +#endif +#if RTREE_HEIGHT_MAX > 7 + RTREE_GET_SUBTREE(6) +#endif +#if RTREE_HEIGHT_MAX > 8 + RTREE_GET_SUBTREE(7) +#endif +#if RTREE_HEIGHT_MAX > 9 + RTREE_GET_SUBTREE(8) +#endif +#if RTREE_HEIGHT_MAX > 10 + RTREE_GET_SUBTREE(9) +#endif +#if RTREE_HEIGHT_MAX > 11 + RTREE_GET_SUBTREE(10) +#endif +#if RTREE_HEIGHT_MAX > 12 + RTREE_GET_SUBTREE(11) +#endif +#if RTREE_HEIGHT_MAX > 13 + RTREE_GET_SUBTREE(12) +#endif +#if RTREE_HEIGHT_MAX > 14 + RTREE_GET_SUBTREE(13) +#endif +#if RTREE_HEIGHT_MAX > 15 + RTREE_GET_SUBTREE(14) +#endif +#if RTREE_HEIGHT_MAX > 16 +# error Unsupported RTREE_HEIGHT_MAX +#endif + RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) +#undef RTREE_GET_SUBTREE +#undef RTREE_GET_LEAF + default: not_reached(); + } +#undef RTREE_GET_BIAS + not_reached(); +} + +JEMALLOC_INLINE bool +rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + const extent_t *extent) +{ + rtree_elm_t *elm; + + assert(extent != NULL); /* Use rtree_clear() for this case. */ + assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); + + elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true); + if (elm == NULL) + return (true); + assert(rtree_elm_read(elm, false) == NULL); + rtree_elm_write(elm, extent); + + return (false); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + bool dependent) +{ + rtree_elm_t *elm; + + elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false); + if (elm == NULL) + return (NULL); + + return (rtree_elm_read(elm, dependent)); +} + +JEMALLOC_INLINE rtree_elm_t * +rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, bool init_missing) +{ + rtree_elm_t *elm; + + elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, + init_missing); + if (!dependent && elm == NULL) + return (NULL); + { + extent_t *extent; + void *s; + + do { + extent = rtree_elm_read(elm, false); + /* The least significant bit serves as a lock. */ + s = (void *)((uintptr_t)extent | (uintptr_t)0x1); + } while (atomic_cas_p(&elm->pun, (void *)extent, s)); + } + + if (config_debug) + rtree_elm_witness_acquire(tsdn, rtree, key, elm); + + return (elm); +} + +JEMALLOC_INLINE extent_t * +rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) +{ + extent_t *extent; + + assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); + extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1)); + assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); + + if (config_debug) + rtree_elm_witness_access(tsdn, rtree, elm); + + return (extent); +} + +JEMALLOC_INLINE void +rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm, + const extent_t *extent) +{ + + assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); + assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); + + if (config_debug) + rtree_elm_witness_access(tsdn, rtree, elm); + + elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1); + assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent); +} + +JEMALLOC_INLINE void +rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) +{ + + rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm)); + if (config_debug) + rtree_elm_witness_release(tsdn, rtree, elm); +} + +JEMALLOC_INLINE void +rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) +{ + rtree_elm_t *elm; + + elm = rtree_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false); + rtree_elm_write_acquired(tsdn, rtree, elm, NULL); + rtree_elm_release(tsdn, rtree, elm); +} +#endif + +#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */ diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h new file mode 100644 index 0000000..5a7a23c --- /dev/null +++ b/include/jemalloc/internal/rtree_structs.h @@ -0,0 +1,86 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H +#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H + +struct rtree_elm_s { + union { + void *pun; + rtree_elm_t *child; + extent_t *extent; + }; +}; + +struct rtree_elm_witness_s { + const rtree_elm_t *elm; + witness_t witness; +}; + +struct rtree_elm_witness_tsd_s { + rtree_elm_witness_t witnesses[RTREE_ELM_ACQUIRE_MAX]; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via levels[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via levels[1], and the remainder of the + * tree can be accessed via levels[0]. + * + * levels[0] : [ | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [ | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * levels[0] unused and avoiding a level of tree traversal. + */ + union { + void *subtree_pun; + rtree_elm_t *subtree; + }; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + +struct rtree_ctx_s { + /* If false, key/elms have not yet been initialized by a lookup. */ + bool valid; + /* Key that corresponds to the tree path recorded in elms. */ + uintptr_t key; + /* Memoized rtree_start_level(key). */ + unsigned start_level; + /* + * A path through rtree, driven by key. Only elements that could + * actually be used for subsequent lookups are initialized, i.e. if + * start_level = rtree_start_level(key) is non-zero, the first + * start_level elements are uninitialized. The last element contains a + * pointer to the leaf node element that corresponds to key, so that + * exact matches require no tree node offset computation. + */ + rtree_elm_t *elms[RTREE_HEIGHT_MAX + 1]; +}; + +struct rtree_s { + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX + 1]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; + malloc_mutex_t init_lock; +}; + +#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */ diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h new file mode 100644 index 0000000..c02ab7a --- /dev/null +++ b/include/jemalloc/internal/rtree_types.h @@ -0,0 +1,58 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_TYPES_H +#define JEMALLOC_INTERNAL_RTREE_TYPES_H + +/* + * This radix tree implementation is tailored to the singular purpose of + * associating metadata with extents that are currently owned by jemalloc. + * + ******************************************************************************* + */ + +typedef struct rtree_elm_s rtree_elm_t; +typedef struct rtree_elm_witness_s rtree_elm_witness_t; +typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t; +typedef struct rtree_level_s rtree_level_t; +typedef struct rtree_ctx_s rtree_ctx_t; +typedef struct rtree_s rtree_t; + +/* + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. + */ +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) +/* Maximum rtree height. */ +#define RTREE_HEIGHT_MAX \ + ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) + +#define RTREE_CTX_INITIALIZER { \ + false, \ + 0, \ + 0, \ + {NULL /* C initializes all trailing elements to NULL. */} \ +} + +/* + * Maximum number of concurrently acquired elements per thread. This controls + * how many witness_t structures are embedded in tsd. Ideally rtree_elm_t would + * have a witness_t directly embedded, but that would dramatically bloat the + * tree. This must contain enough entries to e.g. coalesce two extents. + */ +#define RTREE_ELM_ACQUIRE_MAX 4 + +/* Initializers for rtree_elm_witness_tsd_t. */ +#define RTREE_ELM_WITNESS_INITIALIZER { \ + NULL, \ + WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM) \ +} + +#define RTREE_ELM_WITNESS_TSD_INITIALIZER { \ + { \ + RTREE_ELM_WITNESS_INITIALIZER, \ + RTREE_ELM_WITNESS_INITIALIZER, \ + RTREE_ELM_WITNESS_INITIALIZER, \ + RTREE_ELM_WITNESS_INITIALIZER \ + } \ +} + +#endif /* JEMALLOC_INTERNAL_RTREE_TYPES_H */ diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 5a57f87..3680b65 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -261,9 +261,10 @@ size_classes() { } cat <iteration = 0; -} - -JEMALLOC_INLINE void -spin_adaptive(spin_t *spin) -{ - volatile uint64_t i; - - for (i = 0; i < (KQU(1) << spin->iteration); i++) - CPU_SPINWAIT; - - if (spin->iteration < 63) - spin->iteration++; -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h new file mode 100644 index 0000000..b10f67e --- /dev/null +++ b/include/jemalloc/internal/spin_inlines.h @@ -0,0 +1,31 @@ +#ifndef JEMALLOC_INTERNAL_SPIN_INLINES_H +#define JEMALLOC_INTERNAL_SPIN_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +void spin_init(spin_t *spin); +void spin_adaptive(spin_t *spin); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_)) +JEMALLOC_INLINE void +spin_init(spin_t *spin) +{ + + spin->iteration = 0; +} + +JEMALLOC_INLINE void +spin_adaptive(spin_t *spin) +{ + volatile uint64_t i; + + for (i = 0; i < (KQU(1) << spin->iteration); i++) + CPU_SPINWAIT; + + if (spin->iteration < 63) + spin->iteration++; +} + +#endif + +#endif /* JEMALLOC_INTERNAL_SPIN_INLINES_H */ diff --git a/include/jemalloc/internal/spin_structs.h b/include/jemalloc/internal/spin_structs.h new file mode 100644 index 0000000..ef71a76 --- /dev/null +++ b/include/jemalloc/internal/spin_structs.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_SPIN_STRUCTS_H +#define JEMALLOC_INTERNAL_SPIN_STRUCTS_H + +struct spin_s { + unsigned iteration; +}; + +#endif /* JEMALLOC_INTERNAL_SPIN_STRUCTS_H */ diff --git a/include/jemalloc/internal/spin_types.h b/include/jemalloc/internal/spin_types.h new file mode 100644 index 0000000..52ee4cc --- /dev/null +++ b/include/jemalloc/internal/spin_types.h @@ -0,0 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_SPIN_TYPES_H +#define JEMALLOC_INTERNAL_SPIN_TYPES_H + +typedef struct spin_s spin_t; + +#endif /* JEMALLOC_INTERNAL_SPIN_TYPES_H */ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h deleted file mode 100644 index bea4e3e..0000000 --- a/include/jemalloc/internal/stats.h +++ /dev/null @@ -1,130 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_stats_s tcache_bin_stats_t; -typedef struct malloc_bin_stats_s malloc_bin_stats_t; -typedef struct malloc_large_stats_s malloc_large_stats_t; -typedef struct arena_stats_s arena_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -}; - -struct malloc_bin_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* - * Current number of regions of this size class, including regions - * currently cached by tcache. - */ - size_t curregs; - - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; - - /* Total number of slabs created for this bin's size class. */ - uint64_t nslabs; - - /* - * Total number of slabs reused by extracting them from the slabs heap - * for this bin's size class. - */ - uint64_t reslabs; - - /* Current number of slabs in this bin. */ - size_t curslabs; -}; - -struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* Current number of allocations of this size class. */ - size_t curlextents; -}; - -struct arena_stats_s { - /* Number of bytes currently mapped. */ - size_t mapped; - - /* - * Number of bytes currently retained as a side effect of munmap() being - * disabled/bypassed. Retained bytes are technically mapped (though - * always decommitted or purged), but they are excluded from the mapped - * statistic (above). - */ - size_t retained; - - /* - * Total number of purge sweeps, total number of madvise calls made, - * and total pages purged in order to keep dirty unused memory under - * control. - */ - uint64_t npurge; - uint64_t nmadvise; - uint64_t purged; - - size_t base; - size_t internal; /* Protected via atomic_*_zu(). */ - size_t resident; - - size_t allocated_large; - uint64_t nmalloc_large; - uint64_t ndalloc_large; - uint64_t nrequests_large; - - /* One element for each large size class. */ - malloc_large_stats_t lstats[NSIZES - NBINS]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_stats_print; - -void stats_print(void (*write)(void *, const char *), void *cbopaque, - const char *opts); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/stats_externs.h b/include/jemalloc/internal/stats_externs.h new file mode 100644 index 0000000..a0a1ab6 --- /dev/null +++ b/include/jemalloc/internal/stats_externs.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_STATS_EXTERNS_H +#define JEMALLOC_INTERNAL_STATS_EXTERNS_H + +extern bool opt_stats_print; + +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_INTERNAL_STATS_EXTERNS_H */ diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h new file mode 100644 index 0000000..aaa0bf4 --- /dev/null +++ b/include/jemalloc/internal/stats_structs.h @@ -0,0 +1,107 @@ +#ifndef JEMALLOC_INTERNAL_STATS_STRUCTS_H +#define JEMALLOC_INTERNAL_STATS_STRUCTS_H + +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +struct malloc_bin_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of slabs created for this bin's size class. */ + uint64_t nslabs; + + /* + * Total number of slabs reused by extracting them from the slabs heap + * for this bin's size class. + */ + uint64_t reslabs; + + /* Current number of slabs in this bin. */ + size_t curslabs; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* Current number of allocations of this size class. */ + size_t curlextents; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Number of bytes currently retained as a side effect of munmap() being + * disabled/bypassed. Retained bytes are technically mapped (though + * always decommitted or purged), but they are excluded from the mapped + * statistic (above). + */ + size_t retained; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + size_t base; + size_t internal; /* Protected via atomic_*_zu(). */ + size_t resident; + + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + /* One element for each large size class. */ + malloc_large_stats_t lstats[NSIZES - NBINS]; +}; + +#endif /* JEMALLOC_INTERNAL_STATS_STRUCTS_H */ diff --git a/include/jemalloc/internal/stats_types.h b/include/jemalloc/internal/stats_types.h new file mode 100644 index 0000000..f202b23 --- /dev/null +++ b/include/jemalloc/internal/stats_types.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_STATS_TYPES_H +#define JEMALLOC_INTERNAL_STATS_TYPES_H + +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct arena_stats_s arena_stats_t; + +#endif /* JEMALLOC_INTERNAL_STATS_TYPES_H */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h deleted file mode 100644 index 25a1ad0..0000000 --- a/include/jemalloc/internal/tcache.h +++ /dev/null @@ -1,459 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* - * tcache pointers close to NULL are used to encode state information that is - * used for two purposes: preventing thread caching on a per thread basis and - * cleaning up during thread shutdown. - */ -#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) -#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) -#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) -#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY - -/* - * Absolute minimum number of cache slots for each small bin. - */ -#define TCACHE_NSLOTS_SMALL_MIN 20 - -/* - * Absolute maximum number of cache slots for each small bin in the thread - * cache. This is an additional constraint beyond that imposed as: twice the - * number of regions per slab for this size class. - * - * This constant must be an even number. - */ -#define TCACHE_NSLOTS_SMALL_MAX 200 - -/* Number of cache slots for large size classes. */ -#define TCACHE_NSLOTS_LARGE 20 - -/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ -#define LG_TCACHE_MAXCLASS_DEFAULT 15 - -/* - * TCACHE_GC_SWEEP is the approximate number of allocation events between - * full GC sweeps. Integer rounding may cause the actual number to be - * slightly higher, since GC is performed incrementally. - */ -#define TCACHE_GC_SWEEP 8192 - -/* Number of tcache allocation/deallocation events between incremental GCs. */ -#define TCACHE_GC_INCR \ - ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -typedef enum { - tcache_enabled_false = 0, /* Enable cast to/from bool. */ - tcache_enabled_true = 1, - tcache_enabled_default = 2 -} tcache_enabled_t; - -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { - tcache_bin_stats_t tstats; - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ - unsigned ncached; /* # of cached objects. */ - /* - * To make use of adjacent cacheline prefetch, the items in the avail - * stack goes to higher address for newer allocations. avail points - * just above the available space, which means that - * avail[-ncached, ... -1] are available items and the lowest item will - * be allocated first. - */ - void **avail; /* Stack of available objects. */ -}; - -struct tcache_s { - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ - szind_t next_gc_bin; /* Next bin to GC. */ - tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_tcache; -extern ssize_t opt_lg_tcache_max; - -extern tcache_bin_info_t *tcache_bin_info; - -/* - * Number of tcache bins. There are NBINS small-object bins, plus 0 or more - * large-object bins. - */ -extern unsigned nhbins; - -/* Maximum cached size class. */ -extern size_t tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, - unsigned rem, tcache_t *tcache); -void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, - arena_t *oldarena, arena_t *newarena); -tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); -void tcache_cleanup(tsd_t *tsd); -void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsd_t *tsd, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tsd_t *tsd, tcache_t *tcache); -void tcache_flush(void); -bool tcache_enabled_get(void); -tcache_t *tcache_get(tsd_t *tsd, bool create); -void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); -void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); -void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); -void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind, bool slow_path); -void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size, bool slow_path); -tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -JEMALLOC_INLINE void -tcache_flush(void) -{ - tsd_t *tsd; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_cleanup(tsd); -} - -JEMALLOC_INLINE bool -tcache_enabled_get(void) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_enabled = tsd_tcache_enabled_get(tsd); - if (tcache_enabled == tcache_enabled_default) { - tcache_enabled = (tcache_enabled_t)opt_tcache; - tsd_tcache_enabled_set(tsd, tcache_enabled); - } - - return ((bool)tcache_enabled); -} - -JEMALLOC_INLINE void -tcache_enabled_set(bool enabled) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - - tcache_enabled = (tcache_enabled_t)enabled; - tsd_tcache_enabled_set(tsd, tcache_enabled); - - if (!enabled) - tcache_cleanup(tsd); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(tsd_t *tsd, bool create) -{ - tcache_t *tcache; - - if (!config_tcache) - return (NULL); - - tcache = tsd_tcache_get(tsd); - if (!create) - return (tcache); - if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { - tcache = tcache_get_hard(tsd); - tsd_tcache_set(tsd, tcache); - } - - return (tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_event(tsd_t *tsd, tcache_t *tcache) -{ - - if (TCACHE_GC_INCR == 0) - return; - - if (unlikely(ticker_tick(&tcache->gc_ticker))) - tcache_event_hard(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) -{ - void *ret; - - if (unlikely(tbin->ncached == 0)) { - tbin->low_water = -1; - *tcache_success = false; - return (NULL); - } - /* - * tcache_success (instead of ret) should be checked upon the return of - * this function. We avoid checking (ret == NULL) because there is - * never a null stored on the avail stack (which is unknown to the - * compiler), and eagerly checking ret would cause pipeline stall - * (waiting for the cacheline). - */ - *tcache_success = true; - ret = *(tbin->avail - tbin->ncached); - tbin->ncached--; - - if (unlikely((int)tbin->ncached < tbin->low_water)) - tbin->low_water = tbin->ncached; - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) -{ - void *ret; - tcache_bin_t *tbin; - bool tcache_success; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - - assert(binind < NBINS); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { - bool tcache_hard_success; - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, - tbin, binind, &tcache_hard_success); - if (tcache_hard_success == false) - return (NULL); - } - - assert(ret); - /* - * Only compute usize if required. The checks in the following if - * statement are all static. - */ - if (config_prof || (slow_path && config_fill) || unlikely(zero)) { - usize = index2size(binind); - assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); - } - - if (likely(!zero)) { - if (slow_path && config_fill) { - if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); - } else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else { - if (slow_path && config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); - } - memset(ret, 0, usize); - } - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) -{ - void *ret; - tcache_bin_t *tbin; - bool tcache_success; - - assert(binind < nhbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { - /* - * Only allocate one large object at a time, because it's quite - * expensive to create one and not use it. - */ - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero); - if (ret == NULL) - return (NULL); - } else { - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - - /* Only compute usize on demand */ - if (config_prof || (slow_path && config_fill) || - unlikely(zero)) { - usize = index2size(binind); - assert(usize <= tcache_maxclass); - } - - if (likely(!zero)) { - if (slow_path && config_fill) { - if (unlikely(opt_junk_alloc)) { - memset(ret, JEMALLOC_ALLOC_JUNK, - usize); - } else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else - memset(ret, 0, usize); - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - } - - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, - bool slow_path) -{ - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); - - if (slow_path && config_fill && unlikely(opt_junk_free)) - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tcache, tbin, binind, - (tbin_info->ncached_max >> 1)); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, - bool slow_path) -{ - szind_t binind; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); - - binind = size2index(size); - - if (slow_path && config_fill && unlikely(opt_junk_free)) - large_dalloc_junk(ptr, size); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcaches_get(tsd_t *tsd, unsigned ind) -{ - tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) { - elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, - NULL)); - } - return (elm->tcache); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h new file mode 100644 index 0000000..ead90af --- /dev/null +++ b/include/jemalloc/internal/tcache_externs.h @@ -0,0 +1,47 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H +#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern tcache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern unsigned nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, + arena_t *oldarena, arena_t *newarena); +tcache_t *tcache_get_hard(tsd_t *tsd); +tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); +void tcache_cleanup(tsd_t *tsd); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h new file mode 100644 index 0000000..e522d9e --- /dev/null +++ b/include/jemalloc/internal/tcache_inlines.h @@ -0,0 +1,306 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H +#define JEMALLOC_INTERNAL_TCACHE_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +void tcache_event(tsd_t *tsd, tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); +tcache_t *tcache_get(tsd_t *tsd, bool create); +void tcache_enabled_set(bool enabled); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); +void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, szind_t ind, bool zero, bool slow_path); +void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, szind_t ind, bool zero, bool slow_path); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + szind_t binind, bool slow_path); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size, bool slow_path); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +JEMALLOC_INLINE void +tcache_flush(void) +{ + tsd_t *tsd; + + cassert(config_tcache); + + tsd = tsd_fetch(); + tcache_cleanup(tsd); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tsd_t *tsd; + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tsd = tsd_fetch(); + tcache_enabled = tsd_tcache_enabled_get(tsd); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tsd_tcache_enabled_set(tsd, tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tsd_t *tsd; + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tsd = tsd_fetch(); + + tcache_enabled = (tcache_enabled_t)enabled; + tsd_tcache_enabled_set(tsd, tcache_enabled); + + if (!enabled) + tcache_cleanup(tsd); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get(tsd_t *tsd, bool create) +{ + tcache_t *tcache; + + if (!config_tcache) + return (NULL); + + tcache = tsd_tcache_get(tsd); + if (!create) + return (tcache); + if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { + tcache = tcache_get_hard(tsd); + tsd_tcache_set(tsd, tcache); + } + + return (tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_event(tsd_t *tsd, tcache_t *tcache) +{ + + if (TCACHE_GC_INCR == 0) + return; + + if (unlikely(ticker_tick(&tcache->gc_ticker))) + tcache_event_hard(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) +{ + void *ret; + + if (unlikely(tbin->ncached == 0)) { + tbin->low_water = -1; + *tcache_success = false; + return (NULL); + } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); + tbin->ncached--; + + if (unlikely((int)tbin->ncached < tbin->low_water)) + tbin->low_water = tbin->ncached; + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) +{ + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + assert(binind < NBINS); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, + tbin, binind, &tcache_hard_success); + if (tcache_hard_success == false) + return (NULL); + } + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); + } + } else { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + memset(ret, 0, usize); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += usize; + tcache_event(tsd, tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) +{ + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero); + if (ret == NULL) + return (NULL); + } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + memset(ret, JEMALLOC_ALLOC_JUNK, + usize); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); + } + } else + memset(ret, 0, usize); + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += usize; + } + + tcache_event(tsd, tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) +{ + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + + if (slow_path && config_fill && unlikely(opt_junk_free)) + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) +{ + szind_t binind; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); + + binind = size2index(size); + + if (slow_path && config_fill && unlikely(opt_junk_free)) + large_dalloc_junk(ptr, size); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, + NULL)); + } + return (elm->tcache); +} +#endif + +#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */ diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h new file mode 100644 index 0000000..a2b28af --- /dev/null +++ b/include/jemalloc/internal/tcache_structs.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H +#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H + +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { + tcache_bin_stats_t tstats; + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { + ql_elm(tcache_t) link; /* Used for aggregating stats. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ + ticker_t gc_ticker; /* Drives incremental GC. */ + szind_t next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h new file mode 100644 index 0000000..c6ac767 --- /dev/null +++ b/include/jemalloc/internal/tcache_types.h @@ -0,0 +1,50 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H +#define JEMALLOC_INTERNAL_TCACHE_TYPES_H + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per slab for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h deleted file mode 100644 index 4696e56..0000000 --- a/include/jemalloc/internal/ticker.h +++ /dev/null @@ -1,75 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ticker_s ticker_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ticker_s { - int32_t tick; - int32_t nticks; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void ticker_init(ticker_t *ticker, int32_t nticks); -void ticker_copy(ticker_t *ticker, const ticker_t *other); -int32_t ticker_read(const ticker_t *ticker); -bool ticker_ticks(ticker_t *ticker, int32_t nticks); -bool ticker_tick(ticker_t *ticker); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) -JEMALLOC_INLINE void -ticker_init(ticker_t *ticker, int32_t nticks) -{ - - ticker->tick = nticks; - ticker->nticks = nticks; -} - -JEMALLOC_INLINE void -ticker_copy(ticker_t *ticker, const ticker_t *other) -{ - - *ticker = *other; -} - -JEMALLOC_INLINE int32_t -ticker_read(const ticker_t *ticker) -{ - - return (ticker->tick); -} - -JEMALLOC_INLINE bool -ticker_ticks(ticker_t *ticker, int32_t nticks) -{ - - if (unlikely(ticker->tick < nticks)) { - ticker->tick = ticker->nticks; - return (true); - } - ticker->tick -= nticks; - return(false); -} - -JEMALLOC_INLINE bool -ticker_tick(ticker_t *ticker) -{ - - return (ticker_ticks(ticker, 1)); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker_inlines.h new file mode 100644 index 0000000..42f37eb --- /dev/null +++ b/include/jemalloc/internal/ticker_inlines.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_TICKER_INLINES_H +#define JEMALLOC_INTERNAL_TICKER_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +void ticker_init(ticker_t *ticker, int32_t nticks); +void ticker_copy(ticker_t *ticker, const ticker_t *other); +int32_t ticker_read(const ticker_t *ticker); +bool ticker_ticks(ticker_t *ticker, int32_t nticks); +bool ticker_tick(ticker_t *ticker); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) +JEMALLOC_INLINE void +ticker_init(ticker_t *ticker, int32_t nticks) +{ + + ticker->tick = nticks; + ticker->nticks = nticks; +} + +JEMALLOC_INLINE void +ticker_copy(ticker_t *ticker, const ticker_t *other) +{ + + *ticker = *other; +} + +JEMALLOC_INLINE int32_t +ticker_read(const ticker_t *ticker) +{ + + return (ticker->tick); +} + +JEMALLOC_INLINE bool +ticker_ticks(ticker_t *ticker, int32_t nticks) +{ + + if (unlikely(ticker->tick < nticks)) { + ticker->tick = ticker->nticks; + return (true); + } + ticker->tick -= nticks; + return(false); +} + +JEMALLOC_INLINE bool +ticker_tick(ticker_t *ticker) +{ + + return (ticker_ticks(ticker, 1)); +} +#endif + +#endif /* JEMALLOC_INTERNAL_TICKER_INLINES_H */ diff --git a/include/jemalloc/internal/ticker_structs.h b/include/jemalloc/internal/ticker_structs.h new file mode 100644 index 0000000..e30c4e2 --- /dev/null +++ b/include/jemalloc/internal/ticker_structs.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_TICKER_STRUCTS_H +#define JEMALLOC_INTERNAL_TICKER_STRUCTS_H + +struct ticker_s { + int32_t tick; + int32_t nticks; +}; + +#endif /* JEMALLOC_INTERNAL_TICKER_STRUCTS_H */ diff --git a/include/jemalloc/internal/ticker_types.h b/include/jemalloc/internal/ticker_types.h new file mode 100644 index 0000000..62d67f3 --- /dev/null +++ b/include/jemalloc/internal/ticker_types.h @@ -0,0 +1,6 @@ +#ifndef JEMALLOC_INTERNAL_TICKER_TYPES_H +#define JEMALLOC_INTERNAL_TICKER_TYPES_H + +typedef struct ticker_s ticker_t; + +#endif /* JEMALLOC_INTERNAL_TICKER_TYPES_H */ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h deleted file mode 100644 index c4f010a..0000000 --- a/include/jemalloc/internal/tsd.h +++ /dev/null @@ -1,811 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 2 - -typedef bool (*malloc_tsd_cleanup_t)(void); - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -typedef struct tsd_init_block_s tsd_init_block_t; -typedef struct tsd_init_head_s tsd_init_head_t; -#endif - -typedef struct tsd_s tsd_t; -typedef struct tsdn_s tsdn_t; - -#define TSDN_NULL ((tsdn_t *)0) - -typedef enum { - tsd_state_uninitialized, - tsd_state_nominal, - tsd_state_purgatory, - tsd_state_reincarnated -} tsd_state_t; - -/* - * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are five macros that support (at least) three use cases: file-private, - * library-private, and library-private inlined. Following is an example - * library-private tsd variable: - * - * In example.h: - * typedef struct { - * int x; - * int y; - * } example_t; - * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_types(example_, example_t) - * malloc_tsd_protos(, example_, example_t) - * malloc_tsd_externs(example_, example_t) - * In example.c: - * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, - * example_tsd_cleanup) - * - * The result is a set of generated functions, e.g.: - * - * bool example_tsd_boot(void) {...} - * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get(bool init) {...} - * void example_tsd_set(example_t *val) {...} - * - * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike - * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast the - * function argument to (a_type *), then dereference the resulting pointer to - * access fields, e.g. - * - * void - * example_tsd_cleanup(void *arg) - * { - * example_t *example = (example_t *)arg; - * - * example->x = 42; - * [...] - * if ([want the cleanup function to be called again]) - * example_tsd_set(example); - * } - * - * If example_tsd_set() is called within example_tsd_cleanup(), it will be - * called again. This is similar to how pthreads TSD destruction works, except - * that pthreads only calls the cleanup function again if the value was set to - * non-NULL. - */ - -/* malloc_tsd_types(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_types(a_name, a_type) -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_types(a_name, a_type) -#elif (defined(_WIN32)) -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#else -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#endif - -/* malloc_tsd_protos(). */ -#define malloc_tsd_protos(a_attr, a_name, a_type) \ -a_attr bool \ -a_name##tsd_boot0(void); \ -a_attr void \ -a_name##tsd_boot1(void); \ -a_attr bool \ -a_name##tsd_boot(void); \ -a_attr bool \ -a_name##tsd_booted_get(void); \ -a_attr a_type * \ -a_name##tsd_get(bool init); \ -a_attr void \ -a_name##tsd_set(a_type *val); - -/* malloc_tsd_externs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern __thread bool a_name##tsd_initialized; \ -extern bool a_name##tsd_booted; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern pthread_key_t a_name##tsd_tsd; \ -extern bool a_name##tsd_booted; -#elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##tsd_tsd; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#else -#define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##tsd_tsd; \ -extern tsd_init_head_t a_name##tsd_init_head; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#endif - -/* malloc_tsd_data(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##tsd_initialized = false; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(_WIN32)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##tsd_tsd; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#else -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr tsd_init_head_t a_name##tsd_init_head = { \ - ql_head_initializer(blocks), \ - MALLOC_MUTEX_INITIALIZER \ -}; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#endif - -/* malloc_tsd_funcs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - \ - if (a_name##tsd_initialized) { \ - a_name##tsd_initialized = false; \ - a_cleanup(&a_name##tsd_tls); \ - } \ - return (a_name##tsd_initialized); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - if (likely(&a_name##tsd_tls != val)) \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##tsd_initialized = true; \ -} -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ - 0) \ - return (true); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - if (likely(&a_name##tsd_tls != val)) \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)(&a_name##tsd_tls))) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - } \ -} -#elif (defined(_WIN32)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (wrapper == NULL) \ - return (false); \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - return (true); \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - if (likely(&wrapper->val != val)) \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#else -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr void \ -a_name##tsd_cleanup_wrapper(void *arg) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ - \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - return; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - pthread_getspecific(a_name##tsd_tsd); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - tsd_init_block_t block; \ - wrapper = (a_name##tsd_wrapper_t *) \ - tsd_init_check_recursion(&a_name##tsd_init_head, \ - &block); \ - if (wrapper) \ - return (wrapper); \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - block.data = (void *)wrapper; \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - tsd_init_finish(&a_name##tsd_init_head, &block); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - if (likely(&wrapper->val != val)) \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -struct tsd_init_block_s { - ql_elm(tsd_init_block_t) link; - pthread_t thread; - void *data; -}; -struct tsd_init_head_s { - ql_head(tsd_init_block_t) blocks; - malloc_mutex_t lock; -}; -#endif - -#define MALLOC_TSD \ -/* O(name, type, cleanup) */ \ - O(tcache, tcache_t *, yes) \ - O(thread_allocated, uint64_t, no) \ - O(thread_deallocated, uint64_t, no) \ - O(prof_tdata, prof_tdata_t *, yes) \ - O(iarena, arena_t *, yes) \ - O(arena, arena_t *, yes) \ - O(arenas_tdata, arena_tdata_t *, yes) \ - O(narenas_tdata, unsigned, no) \ - O(arenas_tdata_bypass, bool, no) \ - O(tcache_enabled, tcache_enabled_t, no) \ - O(rtree_ctx, rtree_ctx_t, no) \ - O(witnesses, witness_list_t, yes) \ - O(rtree_elm_witnesses, rtree_elm_witness_tsd_t,no) \ - O(witness_fork, bool, no) \ - -#define TSD_INITIALIZER { \ - tsd_state_uninitialized, \ - NULL, \ - 0, \ - 0, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - 0, \ - false, \ - tcache_enabled_default, \ - RTREE_CTX_INITIALIZER, \ - ql_head_initializer(witnesses), \ - RTREE_ELM_WITNESS_TSD_INITIALIZER, \ - false \ -} - -struct tsd_s { - tsd_state_t state; -#define O(n, t, c) \ - t n; -MALLOC_TSD -#undef O -}; - -/* - * Wrapper around tsd_t that makes it possible to avoid implicit conversion - * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be - * explicitly converted to tsd_t, which is non-nullable. - */ -struct tsdn_s { - tsd_t tsd; -}; - -static const tsd_t tsd_initializer = TSD_INITIALIZER; - -malloc_tsd_types(, tsd_t) - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *arg); -void malloc_tsd_cleanup_register(bool (*f)(void)); -tsd_t *malloc_tsd_boot0(void); -void malloc_tsd_boot1(void); -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -void *tsd_init_check_recursion(tsd_init_head_t *head, - tsd_init_block_t *block); -void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); -#endif -void tsd_cleanup(void *arg); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) - -tsd_t *tsd_fetch_impl(bool init); -tsd_t *tsd_fetch(void); -tsdn_t *tsd_tsdn(tsd_t *tsd); -bool tsd_nominal(tsd_t *tsd); -#define O(n, t, c) \ -t *tsd_##n##p_get(tsd_t *tsd); \ -t tsd_##n##_get(tsd_t *tsd); \ -void tsd_##n##_set(tsd_t *tsd, t n); -MALLOC_TSD -#undef O -tsdn_t *tsdn_fetch(void); -bool tsdn_null(const tsdn_t *tsdn); -tsd_t *tsdn_tsd(tsdn_t *tsdn); -rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) -malloc_tsd_externs(, tsd_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch_impl(bool init) -{ - tsd_t *tsd = tsd_get(init); - - if (!init && tsd_get_allocates() && tsd == NULL) - return (NULL); - assert(tsd != NULL); - - if (unlikely(tsd->state != tsd_state_nominal)) { - if (tsd->state == tsd_state_uninitialized) { - tsd->state = tsd_state_nominal; - /* Trigger cleanup handler registration. */ - tsd_set(tsd); - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; - tsd_set(tsd); - } else - assert(tsd->state == tsd_state_reincarnated); - } - - return (tsd); -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) -{ - - return (tsd_fetch_impl(true)); -} - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsd_tsdn(tsd_t *tsd) -{ - - return ((tsdn_t *)tsd); -} - -JEMALLOC_INLINE bool -tsd_nominal(tsd_t *tsd) -{ - - return (tsd->state == tsd_state_nominal); -} - -#define O(n, t, c) \ -JEMALLOC_ALWAYS_INLINE t * \ -tsd_##n##p_get(tsd_t *tsd) \ -{ \ - \ - return (&tsd->n); \ -} \ - \ -JEMALLOC_ALWAYS_INLINE t \ -tsd_##n##_get(tsd_t *tsd) \ -{ \ - \ - return (*tsd_##n##p_get(tsd)); \ -} \ - \ -JEMALLOC_ALWAYS_INLINE void \ -tsd_##n##_set(tsd_t *tsd, t n) \ -{ \ - \ - assert(tsd->state == tsd_state_nominal); \ - tsd->n = n; \ -} -MALLOC_TSD -#undef O - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsdn_fetch(void) -{ - - if (!tsd_booted_get()) - return (NULL); - - return (tsd_tsdn(tsd_fetch_impl(false))); -} - -JEMALLOC_ALWAYS_INLINE bool -tsdn_null(const tsdn_t *tsdn) -{ - - return (tsdn == NULL); -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsdn_tsd(tsdn_t *tsdn) -{ - - assert(!tsdn_null(tsdn)); - - return (&tsdn->tsd); -} - -JEMALLOC_ALWAYS_INLINE rtree_ctx_t * -tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) -{ - - /* - * If tsd cannot be accessed, initialize the fallback rtree_ctx and - * return a pointer to it. - */ - if (unlikely(tsdn_null(tsdn))) { - static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER; - memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t)); - return (fallback); - } - return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn))); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h new file mode 100644 index 0000000..87ebaf2 --- /dev/null +++ b/include/jemalloc/internal/tsd_externs.h @@ -0,0 +1,18 @@ +#ifndef JEMALLOC_INTERNAL_TSD_EXTERNS_H +#define JEMALLOC_INTERNAL_TSD_EXTERNS_H + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *arg); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +#endif +void tsd_cleanup(void *arg); + +#endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */ diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h new file mode 100644 index 0000000..ad915d1 --- /dev/null +++ b/include/jemalloc/internal/tsd_inlines.h @@ -0,0 +1,140 @@ +#ifndef JEMALLOC_INTERNAL_TSD_INLINES_H +#define JEMALLOC_INTERNAL_TSD_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) + +tsd_t *tsd_fetch_impl(bool init); +tsd_t *tsd_fetch(void); +tsdn_t *tsd_tsdn(tsd_t *tsd); +bool tsd_nominal(tsd_t *tsd); +#define O(n, t, c) \ +t *tsd_##n##p_get(tsd_t *tsd); \ +t tsd_##n##_get(tsd_t *tsd); \ +void tsd_##n##_set(tsd_t *tsd, t n); +MALLOC_TSD +#undef O +tsdn_t *tsdn_fetch(void); +bool tsdn_null(const tsdn_t *tsdn); +tsd_t *tsdn_tsd(tsdn_t *tsdn); +rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) +malloc_tsd_externs(, tsd_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch_impl(bool init) +{ + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) + return (NULL); + assert(tsd != NULL); + + if (unlikely(tsd->state != tsd_state_nominal)) { + if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + } else + assert(tsd->state == tsd_state_reincarnated); + } + + return (tsd); +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + + return (tsd_fetch_impl(true)); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) +{ + + return ((tsdn_t *)tsd); +} + +JEMALLOC_INLINE bool +tsd_nominal(tsd_t *tsd) +{ + + return (tsd->state == tsd_state_nominal); +} + +#define O(n, t, c) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) \ +{ \ + \ + return (&tsd->n); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) \ +{ \ + \ + return (*tsd_##n##p_get(tsd)); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t n) \ +{ \ + \ + assert(tsd->state == tsd_state_nominal); \ + tsd->n = n; \ +} +MALLOC_TSD +#undef O + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) +{ + + if (!tsd_booted_get()) + return (NULL); + + return (tsd_tsdn(tsd_fetch_impl(false))); +} + +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) +{ + + return (tsdn == NULL); +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsdn_tsd(tsdn_t *tsdn) +{ + + assert(!tsdn_null(tsdn)); + + return (&tsdn->tsd); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) +{ + + /* + * If tsd cannot be accessed, initialize the fallback rtree_ctx and + * return a pointer to it. + */ + if (unlikely(tsdn_null(tsdn))) { + static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER; + memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t)); + return (fallback); + } + return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn))); +} +#endif + +#endif /* JEMALLOC_INTERNAL_TSD_INLINES_H */ diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h new file mode 100644 index 0000000..8d94c5b --- /dev/null +++ b/include/jemalloc/internal/tsd_structs.h @@ -0,0 +1,73 @@ +#ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H +#define JEMALLOC_INTERNAL_TSD_STRUCTS_H + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; +#endif + +#define MALLOC_TSD \ +/* O(name, type, cleanup) */ \ + O(tcache, tcache_t *, yes) \ + O(thread_allocated, uint64_t, no) \ + O(thread_deallocated, uint64_t, no) \ + O(prof_tdata, prof_tdata_t *, yes) \ + O(iarena, arena_t *, yes) \ + O(arena, arena_t *, yes) \ + O(arenas_tdata, arena_tdata_t *, yes) \ + O(narenas_tdata, unsigned, no) \ + O(arenas_tdata_bypass, bool, no) \ + O(tcache_enabled, tcache_enabled_t, no) \ + O(rtree_ctx, rtree_ctx_t, no) \ + O(witnesses, witness_list_t, yes) \ + O(rtree_elm_witnesses, rtree_elm_witness_tsd_t,no) \ + O(witness_fork, bool, no) \ + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + NULL, \ + 0, \ + 0, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + 0, \ + false, \ + tcache_enabled_default, \ + RTREE_CTX_INITIALIZER, \ + ql_head_initializer(witnesses), \ + RTREE_ELM_WITNESS_TSD_INITIALIZER, \ + false \ +} + +struct tsd_s { + tsd_state_t state; +#define O(n, t, c) \ + t n; +MALLOC_TSD +#undef O +}; + +/* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion + * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be + * explicitly converted to tsd_t, which is non-nullable. + */ +struct tsdn_s { + tsd_t tsd; +}; + +static const tsd_t tsd_initializer = TSD_INITIALIZER; + +malloc_tsd_types(, tsd_t) + +#endif /* JEMALLOC_INTERNAL_TSD_STRUCTS_H */ diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h new file mode 100644 index 0000000..b48eaec --- /dev/null +++ b/include/jemalloc/internal/tsd_types.h @@ -0,0 +1,579 @@ +#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H +#define JEMALLOC_INTERNAL_TSD_TYPES_H + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 2 + +typedef bool (*malloc_tsd_cleanup_t)(void); + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +typedef struct tsd_init_block_s tsd_init_block_t; +typedef struct tsd_init_head_s tsd_init_head_t; +#endif + +typedef struct tsd_s tsd_t; +typedef struct tsdn_s tsdn_t; + +#define TSDN_NULL ((tsdn_t *)0) + +typedef enum { + tsd_state_uninitialized, + tsd_state_nominal, + tsd_state_purgatory, + tsd_state_reincarnated +} tsd_state_t; + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are five macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) + * In example.c: + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * bool example_tsd_booted_get(void) {...} + * example_t *example_tsd_get(bool init) {...} + * void example_tsd_set(example_t *val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast the + * function argument to (a_type *), then dereference the resulting pointer to + * access fields, e.g. + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = (example_t *)arg; + * + * example->x = 42; + * [...] + * if ([want the cleanup function to be called again]) + * example_tsd_set(example); + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ +a_name##tsd_boot(void); \ +a_attr bool \ +a_name##tsd_booted_get(void); \ +a_attr a_type * \ +a_name##tsd_get(bool init); \ +a_attr void \ +a_name##tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##tsd_tls; \ +extern __thread bool a_name##tsd_initialized; \ +extern bool a_name##tsd_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##tsd_tls; \ +extern pthread_key_t a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##tsd_tsd; \ +extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##tsd_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ + a_name##tsd_initialized = false; \ +a_attr bool a_name##tsd_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##tsd_tls = a_initializer; \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr tsd_init_head_t a_name##tsd_init_head = { \ + ql_head_initializer(blocks), \ + MALLOC_MUTEX_INITIALIZER \ +}; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_cleanup_wrapper(void) \ +{ \ + \ + if (a_name##tsd_initialized) { \ + a_name##tsd_initialized = false; \ + a_cleanup(&a_name##tsd_tls); \ + } \ + return (a_name##tsd_initialized); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##tsd_booted); \ + if (likely(&a_name##tsd_tls != val)) \ + a_name##tsd_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##tsd_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ + 0) \ + return (true); \ + } \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##tsd_booted); \ + if (likely(&a_name##tsd_tls != val)) \ + a_name##tsd_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)(&a_name##tsd_tls))) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_cleanup_wrapper(void) \ +{ \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ + \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ +{ \ + \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ +} \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(bool init) \ +{ \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ + \ + if (init && unlikely(wrapper == NULL)) { \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + wrapper->initialized = false; \ + wrapper->val = a_initializer; \ + } \ + a_name##tsd_wrapper_set(wrapper); \ + } \ + return (wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(true); \ + if (likely(&wrapper->val != val)) \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ +{ \ + \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ +} \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + pthread_getspecific(a_name##tsd_tsd); \ + \ + if (init && unlikely(wrapper == NULL)) { \ + tsd_init_block_t block; \ + wrapper = (a_name##tsd_wrapper_t *) \ + tsd_init_check_recursion(&a_name##tsd_init_head, \ + &block); \ + if (wrapper) \ + return (wrapper); \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + block.data = (void *)wrapper; \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + wrapper->initialized = false; \ + wrapper->val = a_initializer; \ + } \ + a_name##tsd_wrapper_set(wrapper); \ + tsd_init_finish(&a_name##tsd_init_head, &block); \ + } \ + return (wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(true); \ + if (likely(&wrapper->val != val)) \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h deleted file mode 100644 index 592806d..0000000 --- a/include/jemalloc/internal/util.h +++ /dev/null @@ -1,346 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#ifdef _WIN32 -# ifdef _WIN64 -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "ll" -# else -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "" -# endif -# define FMTd32 "d" -# define FMTu32 "u" -# define FMTx32 "x" -# define FMTd64 FMT64_PREFIX "d" -# define FMTu64 FMT64_PREFIX "u" -# define FMTx64 FMT64_PREFIX "x" -# define FMTdPTR FMTPTR_PREFIX "d" -# define FMTuPTR FMTPTR_PREFIX "u" -# define FMTxPTR FMTPTR_PREFIX "x" -#else -# include -# define FMTd32 PRId32 -# define FMTu32 PRIu32 -# define FMTx32 PRIx32 -# define FMTd64 PRId64 -# define FMTu64 PRIu64 -# define FMTx64 PRIx64 -# define FMTdPTR PRIdPTR -# define FMTuPTR PRIuPTR -# define FMTxPTR PRIxPTR -#endif - -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - -/* - * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be - * large enough for all possible uses within jemalloc. - */ -#define MALLOC_PRINTF_BUFSIZE 4096 - -/* Junk fill patterns. */ -#ifndef JEMALLOC_ALLOC_JUNK -# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) -#endif -#ifndef JEMALLOC_FREE_JUNK -# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) -#endif - -/* - * Wrap a cpp argument that contains commas such that it isn't broken up into - * multiple arguments. - */ -#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ - -/* cpp macro definition stringification. */ -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) - -/* - * Silence compiler warnings due to uninitialized values. This is used - * wherever the compiler fails to recognize that the variable is never used - * uninitialized. - */ -#ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v -#else -# define JEMALLOC_CC_SILENCE_INIT(v) -#endif - -#ifdef __GNUC__ -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) -#else -# define likely(x) !!(x) -# define unlikely(x) !!(x) -#endif - -#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) -# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure -#endif - -#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() - -#include "jemalloc/internal/assert.h" - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if (unlikely(!(c))) \ - not_reached(); \ -} while (0) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int buferror(int err, char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *restrict nptr, - char **restrict endptr, int base); -void malloc_write(const char *s); - -/* - * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating - * point math. - */ -size_t malloc_vsnprintf(char *str, size_t size, const char *format, - va_list ap); -size_t malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned ffs_llu(unsigned long long bitmap); -unsigned ffs_lu(unsigned long bitmap); -unsigned ffs_u(unsigned bitmap); -unsigned ffs_zu(size_t bitmap); -unsigned ffs_u64(uint64_t bitmap); -unsigned ffs_u32(uint32_t bitmap); -uint64_t pow2_ceil_u64(uint64_t x); -uint32_t pow2_ceil_u32(uint32_t x); -size_t pow2_ceil_zu(size_t x); -unsigned lg_floor(size_t x); -void set_errno(int errnum); -int get_errno(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) - -/* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ - || !defined(JEMALLOC_INTERNAL_FFS) -# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure -#endif - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_llu(unsigned long long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSLL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_lu(unsigned long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u(unsigned bitmap) -{ - - return (JEMALLOC_INTERNAL_FFS(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_zu(size_t bitmap) -{ - -#if LG_SIZEOF_PTR == LG_SIZEOF_INT - return (ffs_u(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG - return (ffs_llu(bitmap)); -#else -#error No implementation for size_t ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u64(uint64_t bitmap) -{ - -#if LG_SIZEOF_LONG == 3 - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_LONG_LONG == 3 - return (ffs_llu(bitmap)); -#else -#error No implementation for 64-bit ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u32(uint32_t bitmap) -{ - -#if LG_SIZEOF_INT == 2 - return (ffs_u(bitmap)); -#else -#error No implementation for 32-bit ffs() -#endif - return (ffs_u(bitmap)); -} - -JEMALLOC_INLINE uint64_t -pow2_ceil_u64(uint64_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x |= x >> 32; - x++; - return (x); -} - -JEMALLOC_INLINE uint32_t -pow2_ceil_u32(uint32_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return (x); -} - -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil_zu(size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return (pow2_ceil_u64(x)); -#else - return (pow2_ceil_u32(x)); -#endif -} - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - size_t ret; - - assert(x != 0); - - asm ("bsr %1, %0" - : "=r"(ret) // Outputs. - : "r"(x) // Inputs. - ); - assert(ret < UINT_MAX); - return ((unsigned)ret); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - unsigned long ret; - - assert(x != 0); - -#if (LG_SIZEOF_PTR == 3) - _BitScanReverse64(&ret, x); -#elif (LG_SIZEOF_PTR == 2) - _BitScanReverse(&ret, x); -#else -# error "Unsupported type size for lg_floor()" -#endif - assert(ret < UINT_MAX); - return ((unsigned)ret); -} -#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - - assert(x != 0); - -#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); -#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); -#else -# error "Unsupported type size for lg_floor()" -#endif -} -#else -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - - assert(x != 0); - - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3) - x |= (x >> 32); -#endif - if (x == SIZE_T_MAX) - return ((8 << LG_SIZEOF_PTR) - 1); - x++; - return (ffs_zu(x) - 2); -} -#endif - -/* Set error code. */ -JEMALLOC_INLINE void -set_errno(int errnum) -{ - -#ifdef _WIN32 - SetLastError(errnum); -#else - errno = errnum; -#endif -} - -/* Get last error code. */ -JEMALLOC_INLINE int -get_errno(void) -{ - -#ifdef _WIN32 - return (GetLastError()); -#else - return (errno); -#endif -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/util_externs.h b/include/jemalloc/internal/util_externs.h new file mode 100644 index 0000000..b203b77 --- /dev/null +++ b/include/jemalloc/internal/util_externs.h @@ -0,0 +1,23 @@ +#ifndef JEMALLOC_INTERNAL_UTIL_EXTERNS_H +#define JEMALLOC_INTERNAL_UTIL_EXTERNS_H + +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, + char **restrict endptr, int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +size_t malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); + +#endif /* JEMALLOC_INTERNAL_UTIL_EXTERNS_H */ diff --git a/include/jemalloc/internal/util_inlines.h b/include/jemalloc/internal/util_inlines.h new file mode 100644 index 0000000..93f5b1d --- /dev/null +++ b/include/jemalloc/internal/util_inlines.h @@ -0,0 +1,224 @@ +#ifndef JEMALLOC_INTERNAL_UTIL_INLINES_H +#define JEMALLOC_INTERNAL_UTIL_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned ffs_llu(unsigned long long bitmap); +unsigned ffs_lu(unsigned long bitmap); +unsigned ffs_u(unsigned bitmap); +unsigned ffs_zu(size_t bitmap); +unsigned ffs_u64(uint64_t bitmap); +unsigned ffs_u32(uint32_t bitmap); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); +unsigned lg_floor(size_t x); +void set_errno(int errnum); +int get_errno(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_llu(unsigned long long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_lu(unsigned long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u(unsigned bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_zu(size_t bitmap) +{ + +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return (ffs_u(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return (ffs_llu(bitmap)); +#else +#error No implementation for size_t ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u64(uint64_t bitmap) +{ + +#if LG_SIZEOF_LONG == 3 + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_LONG_LONG == 3 + return (ffs_llu(bitmap)); +#else +#error No implementation for 64-bit ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u32(uint32_t bitmap) +{ + +#if LG_SIZEOF_INT == 2 + return (ffs_u(bitmap)); +#else +#error No implementation for 32-bit ffs() +#endif + return (ffs_u(bitmap)); +} + +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + x++; + return (x); +} + +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + size_t ret; + + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + assert(ret < UINT_MAX); + return ((unsigned)ret); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type size for lg_floor()" +#endif + assert(ret < UINT_MAX); + return ((unsigned)ret); +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); +#else +# error "Unsupported type size for lg_floor()" +#endif +} +#else +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3) + x |= (x >> 32); +#endif + if (x == SIZE_T_MAX) + return ((8 << LG_SIZEOF_PTR) - 1); + x++; + return (ffs_zu(x) - 2); +} +#endif + +/* Set error code. */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code. */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return (GetLastError()); +#else + return (errno); +#endif +} +#endif + +#endif /* JEMALLOC_INTERNAL_UTIL_INLINES_H */ diff --git a/include/jemalloc/internal/util_types.h b/include/jemalloc/internal/util_types.h new file mode 100644 index 0000000..7f72799 --- /dev/null +++ b/include/jemalloc/internal/util_types.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_UTIL_TYPES_H +#define JEMALLOC_INTERNAL_UTIL_TYPES_H + +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +/* Junk fill patterns. */ +#ifndef JEMALLOC_ALLOC_JUNK +# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) +#endif +#ifndef JEMALLOC_FREE_JUNK +# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) +#endif + +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ + +/* cpp macro definition stringification. */ +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#else +# define likely(x) !!(x) +# define unlikely(x) !!(x) +#endif + +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure +#endif + +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() + +#include "jemalloc/internal/assert.h" + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if (unlikely(!(c))) \ + not_reached(); \ +} while (0) + +#endif /* JEMALLOC_INTERNAL_UTIL_TYPES_H */ diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h deleted file mode 100644 index 86ddb64..0000000 --- a/include/jemalloc/internal/witness.h +++ /dev/null @@ -1,275 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct witness_s witness_t; -typedef unsigned witness_rank_t; -typedef ql_head(witness_t) witness_list_t; -typedef int witness_comp_t (const witness_t *, void *, const witness_t *, - void *); - -/* - * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by - * the witness machinery. - */ -#define WITNESS_RANK_OMIT 0U - -#define WITNESS_RANK_INIT 1U -#define WITNESS_RANK_CTL 1U -#define WITNESS_RANK_ARENAS 2U - -#define WITNESS_RANK_PROF_DUMP 3U -#define WITNESS_RANK_PROF_BT2GCTX 4U -#define WITNESS_RANK_PROF_TDATAS 5U -#define WITNESS_RANK_PROF_TDATA 6U -#define WITNESS_RANK_PROF_GCTX 7U - -#define WITNESS_RANK_ARENA 8U -#define WITNESS_RANK_ARENA_EXTENTS 9U -#define WITNESS_RANK_ARENA_EXTENT_CACHE 10 - -#define WITNESS_RANK_RTREE_ELM 11U -#define WITNESS_RANK_RTREE 12U -#define WITNESS_RANK_BASE 13U - -#define WITNESS_RANK_LEAF 0xffffffffU -#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF -#define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF -#define WITNESS_RANK_DSS WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF - -#define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}} - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct witness_s { - /* Name, used for printing lock order reversal messages. */ - const char *name; - - /* - * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses - * must be acquired in order of increasing rank. - */ - witness_rank_t rank; - - /* - * If two witnesses are of equal rank and they have the samp comp - * function pointer, it is called as a last attempt to differentiate - * between witnesses of equal rank. - */ - witness_comp_t *comp; - - /* Opaque data, passed to comp(). */ - void *opaque; - - /* Linkage for thread's currently owned locks. */ - ql_elm(witness_t) link; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void witness_init(witness_t *witness, const char *name, witness_rank_t rank, - witness_comp_t *comp, void *opaque); -#ifdef JEMALLOC_JET -typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); -extern witness_lock_error_t *witness_lock_error; -#else -void witness_lock_error(const witness_list_t *witnesses, - const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_owner_error_t)(const witness_t *); -extern witness_owner_error_t *witness_owner_error; -#else -void witness_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_not_owner_error_t)(const witness_t *); -extern witness_not_owner_error_t *witness_not_owner_error; -#else -void witness_not_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET -typedef void (witness_lockless_error_t)(const witness_list_t *); -extern witness_lockless_error_t *witness_lockless_error; -#else -void witness_lockless_error(const witness_list_t *witnesses); -#endif - -void witnesses_cleanup(tsd_t *tsd); -void witness_prefork(tsd_t *tsd); -void witness_postfork_parent(tsd_t *tsd); -void witness_postfork_child(tsd_t *tsd); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool witness_owner(tsd_t *tsd, const witness_t *witness); -void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_lockless(tsdn_t *tsdn); -void witness_lock(tsdn_t *tsdn, witness_t *witness); -void witness_unlock(tsdn_t *tsdn, witness_t *witness); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -/* Helper, not intended for direct use. */ -JEMALLOC_INLINE bool -witness_owner(tsd_t *tsd, const witness_t *witness) -{ - witness_list_t *witnesses; - witness_t *w; - - cassert(config_debug); - - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - return (true); - } - - return (false); -} - -JEMALLOC_INLINE void -witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - if (witness_owner(tsd, witness)) - return; - witness_owner_error(witness); -} - -JEMALLOC_INLINE void -witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - witnesses = tsd_witnessesp_get(tsd); - ql_foreach(w, witnesses, link) { - if (w == witness) - witness_not_owner_error(witness); - } -} - -JEMALLOC_INLINE void -witness_assert_lockless(tsdn_t *tsdn) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - - witnesses = tsd_witnessesp_get(tsd); - w = ql_last(witnesses, link); - if (w != NULL) - witness_lockless_error(witnesses); -} - -JEMALLOC_INLINE void -witness_lock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - witness_t *w; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - witness_assert_not_owner(tsdn, witness); - - witnesses = tsd_witnessesp_get(tsd); - w = ql_last(witnesses, link); - if (w == NULL) { - /* No other locks; do nothing. */ - } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { - /* Forking, and relaxed ranking satisfied. */ - } else if (w->rank > witness->rank) { - /* Not forking, rank order reversal. */ - witness_lock_error(witnesses, witness); - } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != - witness->comp || w->comp(w, w->opaque, witness, witness->opaque) > - 0)) { - /* - * Missing/incompatible comparison function, or comparison - * function indicates rank order reversal. - */ - witness_lock_error(witnesses, witness); - } - - ql_elm_new(witness, link); - ql_tail_insert(witnesses, witness, link); -} - -JEMALLOC_INLINE void -witness_unlock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; - witness_list_t *witnesses; - - if (!config_debug) - return; - - if (tsdn_null(tsdn)) - return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) - return; - - /* - * Check whether owner before removal, rather than relying on - * witness_assert_owner() to abort, so that unit tests can test this - * function's failure mode without causing undefined behavior. - */ - if (witness_owner(tsd, witness)) { - witnesses = tsd_witnessesp_get(tsd); - ql_remove(witnesses, witness, link); - } else - witness_assert_owner(tsdn, witness); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/witness_externs.h b/include/jemalloc/internal/witness_externs.h new file mode 100644 index 0000000..dcd987c --- /dev/null +++ b/include/jemalloc/internal/witness_externs.h @@ -0,0 +1,37 @@ +#ifndef JEMALLOC_INTERNAL_WITNESS_EXTERNS_H +#define JEMALLOC_INTERNAL_WITNESS_EXTERNS_H + +void witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp, void *opaque); +#ifdef JEMALLOC_JET +typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); +extern witness_lock_error_t *witness_lock_error; +#else +void witness_lock_error(const witness_list_t *witnesses, + const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_owner_error_t)(const witness_t *); +extern witness_owner_error_t *witness_owner_error; +#else +void witness_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_not_owner_error_t)(const witness_t *); +extern witness_not_owner_error_t *witness_not_owner_error; +#else +void witness_not_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_lockless_error_t)(const witness_list_t *); +extern witness_lockless_error_t *witness_lockless_error; +#else +void witness_lockless_error(const witness_list_t *witnesses); +#endif + +void witnesses_cleanup(tsd_t *tsd); +void witness_prefork(tsd_t *tsd); +void witness_postfork_parent(tsd_t *tsd); +void witness_postfork_child(tsd_t *tsd); + +#endif /* JEMALLOC_INTERNAL_WITNESS_EXTERNS_H */ diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h new file mode 100644 index 0000000..259aa2e --- /dev/null +++ b/include/jemalloc/internal/witness_inlines.h @@ -0,0 +1,163 @@ +#ifndef JEMALLOC_INTERNAL_WITNESS_INLINES_H +#define JEMALLOC_INTERNAL_WITNESS_INLINES_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool witness_owner(tsd_t *tsd, const witness_t *witness); +void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_lockless(tsdn_t *tsdn); +void witness_lock(tsdn_t *tsdn, witness_t *witness); +void witness_unlock(tsdn_t *tsdn, witness_t *witness); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +/* Helper, not intended for direct use. */ +JEMALLOC_INLINE bool +witness_owner(tsd_t *tsd, const witness_t *witness) +{ + witness_list_t *witnesses; + witness_t *w; + + cassert(config_debug); + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + return (true); + } + + return (false); +} + +JEMALLOC_INLINE void +witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + if (witness_owner(tsd, witness)) + return; + witness_owner_error(witness); +} + +JEMALLOC_INLINE void +witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + witness_not_owner_error(witness); + } +} + +JEMALLOC_INLINE void +witness_assert_lockless(tsdn_t *tsdn) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w != NULL) + witness_lockless_error(witnesses); +} + +JEMALLOC_INLINE void +witness_lock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witness_assert_not_owner(tsdn, witness); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w == NULL) { + /* No other locks; do nothing. */ + } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { + /* Forking, and relaxed ranking satisfied. */ + } else if (w->rank > witness->rank) { + /* Not forking, rank order reversal. */ + witness_lock_error(witnesses, witness); + } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != + witness->comp || w->comp(w, w->opaque, witness, witness->opaque) > + 0)) { + /* + * Missing/incompatible comparison function, or comparison + * function indicates rank order reversal. + */ + witness_lock_error(witnesses, witness); + } + + ql_elm_new(witness, link); + ql_tail_insert(witnesses, witness, link); +} + +JEMALLOC_INLINE void +witness_unlock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(tsd, witness)) { + witnesses = tsd_witnessesp_get(tsd); + ql_remove(witnesses, witness, link); + } else + witness_assert_owner(tsdn, witness); +} +#endif + +#endif /* JEMALLOC_INTERNAL_WITNESS_INLINES_H */ diff --git a/include/jemalloc/internal/witness_structs.h b/include/jemalloc/internal/witness_structs.h new file mode 100644 index 0000000..95d1970 --- /dev/null +++ b/include/jemalloc/internal/witness_structs.h @@ -0,0 +1,28 @@ +#ifndef JEMALLOC_INTERNAL_WITNESS_STRUCTS_H +#define JEMALLOC_INTERNAL_WITNESS_STRUCTS_H + +struct witness_s { + /* Name, used for printing lock order reversal messages. */ + const char *name; + + /* + * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses + * must be acquired in order of increasing rank. + */ + witness_rank_t rank; + + /* + * If two witnesses are of equal rank and they have the samp comp + * function pointer, it is called as a last attempt to differentiate + * between witnesses of equal rank. + */ + witness_comp_t *comp; + + /* Opaque data, passed to comp(). */ + void *opaque; + + /* Linkage for thread's currently owned locks. */ + ql_elm(witness_t) link; +}; + +#endif /* JEMALLOC_INTERNAL_WITNESS_STRUCTS_H */ diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h new file mode 100644 index 0000000..ef96282 --- /dev/null +++ b/include/jemalloc/internal/witness_types.h @@ -0,0 +1,46 @@ +#ifndef JEMALLOC_INTERNAL_WITNESS_TYPES_H +#define JEMALLOC_INTERNAL_WITNESS_TYPES_H + +typedef struct witness_s witness_t; +typedef unsigned witness_rank_t; +typedef ql_head(witness_t) witness_list_t; +typedef int witness_comp_t (const witness_t *, void *, const witness_t *, + void *); + +/* + * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by + * the witness machinery. + */ +#define WITNESS_RANK_OMIT 0U + +#define WITNESS_RANK_INIT 1U +#define WITNESS_RANK_CTL 1U +#define WITNESS_RANK_ARENAS 2U + +#define WITNESS_RANK_PROF_DUMP 3U +#define WITNESS_RANK_PROF_BT2GCTX 4U +#define WITNESS_RANK_PROF_TDATAS 5U +#define WITNESS_RANK_PROF_TDATA 6U +#define WITNESS_RANK_PROF_GCTX 7U + +#define WITNESS_RANK_ARENA 8U +#define WITNESS_RANK_ARENA_EXTENTS 9U +#define WITNESS_RANK_ARENA_EXTENT_CACHE 10 + +#define WITNESS_RANK_RTREE_ELM 11U +#define WITNESS_RANK_RTREE 12U +#define WITNESS_RANK_BASE 13U + +#define WITNESS_RANK_LEAF 0xffffffffU +#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF +#define WITNESS_RANK_DSS WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF + +#define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}} + +#endif /* JEMALLOC_INTERNAL_WITNESS_TYPES_H */ diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 66485c0..2dd0cde 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -69,18 +69,14 @@ static const bool config_debug = # define JEMALLOC_N(n) @private_namespace@##n # include "jemalloc/internal/private_namespace.h" -# define JEMALLOC_H_TYPES -# define JEMALLOC_H_STRUCTS -# define JEMALLOC_H_EXTERNS -# define JEMALLOC_H_INLINES -# include "jemalloc/internal/nstime.h" -# include "jemalloc/internal/util.h" +# include "jemalloc/internal/nstime_types.h" +# include "jemalloc/internal/nstime_structs.h" +# include "jemalloc/internal/nstime_externs.h" +# include "jemalloc/internal/util_types.h" +# include "jemalloc/internal/util_externs.h" +# include "jemalloc/internal/util_inlines.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" -# undef JEMALLOC_H_TYPES -# undef JEMALLOC_H_STRUCTS -# undef JEMALLOC_H_EXTERNS -# undef JEMALLOC_H_INLINES /******************************************************************************/ /* -- cgit v0.12