diff options
Diffstat (limited to 'include/jemalloc')
101 files changed, 8073 insertions, 8567 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h deleted file mode 100644 index 119e3a5..0000000 --- a/include/jemalloc/internal/arena.h +++ /dev/null @@ -1,1528 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) - -/* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) -#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) - -/* - * Minimum redzone size. Redzones may be larger than this if necessary to - * preserve region alignment. - */ -#define REDZONE_MINSIZE 16 - -/* - * The minimum ratio of active:dirty pages per arena is computed as: - * - * (nactive >> lg_dirty_mult) >= ndirty - * - * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as - * many active pages as dirty pages. - */ -#define LG_DIRTY_MULT_DEFAULT 3 - -typedef enum { - purge_mode_ratio = 0, - purge_mode_decay = 1, - - purge_mode_limit = 2 -} purge_mode_t; -#define PURGE_DEFAULT purge_mode_ratio -/* Default decay time in seconds. */ -#define DECAY_TIME_DEFAULT 10 -/* Number of event ticks between time checks. */ -#define DECAY_NTICKS_PER_UPDATE 1000 - -typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; -typedef struct arena_avail_links_s arena_avail_links_t; -typedef struct arena_run_s arena_run_t; -typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; -typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; -typedef struct arena_chunk_s arena_chunk_t; -typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_decay_s arena_decay_t; -typedef struct arena_bin_s arena_bin_t; -typedef struct arena_s arena_t; -typedef struct arena_tdata_s arena_tdata_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#ifdef JEMALLOC_ARENA_STRUCTS_A -struct arena_run_s { - /* Index of bin this run is associated with. */ - szind_t binind; - - /* Number of free regions in run. */ - unsigned nfree; - - /* Per region allocated/deallocated bitmap. */ - bitmap_t bitmap[BITMAP_GROUPS_MAX]; -}; - -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_bits_s { - /* - * Run address (or size) and various flags are stored together. The bit - * layout looks like (assuming 32-bit system): - * - * ???????? ???????? ???nnnnn nnndumla - * - * ? : Unallocated: Run address for first/last pages, unset for internal - * pages. - * Small: Run page offset. - * Large: Run page count for first page, unset for trailing pages. - * n : binind for small size class, BININD_INVALID for large size class. - * d : dirty? - * u : unzeroed? - * m : decommitted? - * l : large? - * a : allocated? - * - * Following are example bit patterns for the three types of runs. - * - * p : run page offset - * s : run size - * n : binind for size class; large objects set these to BININD_INVALID - * x : don't care - * - : 0 - * + : 1 - * [DUMLA] : bit set - * [dumla] : bit unset - * - * Unallocated (clean): - * ssssssss ssssssss sss+++++ +++dum-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx - * ssssssss ssssssss sss+++++ +++dUm-a - * - * Unallocated (dirty): - * ssssssss ssssssss sss+++++ +++D-m-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss sss+++++ +++D-m-a - * - * Small: - * pppppppp pppppppp pppnnnnn nnnd---A - * pppppppp pppppppp pppnnnnn nnn----A - * pppppppp pppppppp pppnnnnn nnnd---A - * - * Large: - * ssssssss ssssssss sss+++++ +++D--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - * - * Large (sampled, size <= LARGE_MINCLASS): - * ssssssss ssssssss sssnnnnn nnnD--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - * - * Large (not sampled, size == LARGE_MINCLASS): - * ssssssss ssssssss sss+++++ +++D--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - */ - size_t bits; -#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) -#define CHUNK_MAP_LARGE ((size_t)0x02U) -#define CHUNK_MAP_STATE_MASK ((size_t)0x3U) - -#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U) -#define CHUNK_MAP_UNZEROED ((size_t)0x08U) -#define CHUNK_MAP_DIRTY ((size_t)0x10U) -#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU) - -#define CHUNK_MAP_BININD_SHIFT 5 -#define BININD_INVALID ((size_t)0xffU) -#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) -#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK - -#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8) -#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE) -#define CHUNK_MAP_SIZE_MASK \ - (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK)) -}; - -struct arena_runs_dirty_link_s { - qr(arena_runs_dirty_link_t) rd_link; -}; - -/* - * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just - * like arena_chunk_map_bits_t. Two separate arrays are stored within each - * chunk header in order to improve cache locality. - */ -struct arena_chunk_map_misc_s { - /* - * Linkage for run heaps. There are two disjoint uses: - * - * 1) arena_t's runs_avail heaps. - * 2) arena_run_t conceptually uses this linkage for in-use non-full - * runs, rather than directly embedding linkage. - */ - phn(arena_chunk_map_misc_t) ph_link; - - union { - /* Linkage for list of dirty runs. */ - arena_runs_dirty_link_t rd; - - /* Profile counters, used for large object runs. */ - union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; - }; - - /* Small region run metadata. */ - arena_run_t run; - }; -}; -typedef ph(arena_chunk_map_misc_t) arena_run_heap_t; -#endif /* JEMALLOC_ARENA_STRUCTS_A */ - -#ifdef JEMALLOC_ARENA_STRUCTS_B -/* Arena chunk header. */ -struct arena_chunk_s { - /* - * A pointer to the arena that owns the chunk is stored within the node. - * This field as a whole is used by chunks_rtree to support both - * ivsalloc() and core-based debugging. - */ - extent_node_t node; - - /* - * True if memory could be backed by transparent huge pages. This is - * only directly relevant to Linux, since it is the only supported - * platform on which jemalloc interacts with explicit transparent huge - * page controls. - */ - bool hugepage; - - /* - * Map of pages within chunk that keeps track of free/large/small. The - * first map_bias entries are omitted, since the chunk header does not - * need to be tracked in the map. This omission saves a header page - * for common chunk sizes (e.g. 4 MiB). - */ - arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ -}; - -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each run has the following layout: - * - * /--------------------\ - * | pad? | - * |--------------------| - * | redzone | - * reg0_offset | region 0 | - * | redzone | - * |--------------------| \ - * | redzone | | - * | region 1 | > reg_interval - * | redzone | / - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | redzone | - * | region nregs-1 | - * | redzone | - * |--------------------| - * | alignment pad? | - * \--------------------/ - * - * reg_interval has at least the same minimum alignment as reg_size; this - * preserves the alignment constraint that sa2u() depends on. Alignment pad is - * either 0 or redzone_size; it is present only if needed to align reg0_offset. - */ -struct arena_bin_info_s { - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Redzone size. */ - size_t redzone_size; - - /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - - /* - * Metadata used to manipulate bitmaps for runs associated with this - * bin. - */ - bitmap_info_t bitmap_info; - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - -struct arena_decay_s { - /* - * Approximate time in seconds from the creation of a set of unused - * dirty pages until an equivalent set of unused dirty pages is purged - * and/or reused. - */ - ssize_t time; - /* time / SMOOTHSTEP_NSTEPS. */ - nstime_t interval; - /* - * Time at which the current decay interval logically started. We do - * not actually advance to a new epoch until sometime after it starts - * because of scheduling and computation delays, and it is even possible - * to completely skip epochs. In all cases, during epoch advancement we - * merge all relevant activity into the most recently recorded epoch. - */ - nstime_t epoch; - /* Deadline randomness generator. */ - uint64_t jitter_state; - /* - * Deadline for current epoch. This is the sum of interval and per - * epoch jitter which is a uniform random variable in [0..interval). - * Epochs always advance by precise multiples of interval, but we - * randomize the deadline to reduce the likelihood of arenas purging in - * lockstep. - */ - nstime_t deadline; - /* - * Number of dirty pages at beginning of current epoch. During epoch - * advancement we use the delta between arena->decay.ndirty and - * arena->ndirty to determine how many dirty pages, if any, were - * generated. - */ - size_t ndirty; - /* - * Trailing log of how many unused dirty pages were generated during - * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last - * element is the most recent epoch. Corresponding epoch times are - * relative to epoch. - */ - size_t backlog[SMOOTHSTEP_NSTEPS]; -}; - -struct arena_bin_s { - /* - * All operations on runcur, runs, and stats require that lock be - * locked. Run allocation/deallocation are protected by the arena lock, - * which may be acquired while holding one or more bin locks, but not - * vise versa. - */ - malloc_mutex_t lock; - - /* - * Current run being used to service allocations of this bin's size - * class. - */ - arena_run_t *runcur; - - /* - * Heap of non-full runs. This heap is used when looking for an - * existing run when runcur is no longer usable. We choose the - * non-full run that is lowest in memory; this policy tends to keep - * objects packed well, and it can also help reduce the number of - * almost-empty chunks. - */ - arena_run_heap_t runs; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - -struct arena_s { - /* This arena's index within the arenas array. */ - unsigned ind; - - /* - * Number of threads currently assigned to this arena, synchronized via - * atomic operations. Each thread has two distinct assignments, one for - * application-serving allocation, and the other for internal metadata - * allocation. Internal metadata must not be allocated from arenas - * created via the arenas.extend mallctl, because the arena.<i>.reset - * mallctl indiscriminately discards all allocations for the affected - * arena. - * - * 0: Application allocation. - * 1: Internal metadata allocation. - */ - unsigned nthreads[2]; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread assignment (modifies nthreads) is synchronized via atomics. - * 2) Bin-related operations are protected by bin locks. - * 3) Chunk- and run-related operations are protected by this mutex. - */ - malloc_mutex_t lock; - - arena_stats_t stats; - /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. - */ - ql_head(tcache_t) tcache_ql; - - uint64_t prof_accumbytes; - - /* - * PRNG state for cache index randomization of large allocation base - * pointers. - */ - size_t offset_state; - - dss_prec_t dss_prec; - - /* Extant arena chunks. */ - ql_head(extent_node_t) achunks; - - /* Extent serial number generator state. */ - size_t extent_sn_next; - - /* - * In order to avoid rapid chunk allocation/deallocation when an arena - * oscillates right on the cusp of needing a new chunk, cache the most - * recently freed chunk. The spare is left in the arena's chunk trees - * until it is deleted. - * - * There is one spare chunk per arena, rather than one spare total, in - * order to avoid interactions between multiple threads that could make - * a single spare inadequate. - */ - arena_chunk_t *spare; - - /* Minimum ratio (log base 2) of nactive:ndirty. */ - ssize_t lg_dirty_mult; - - /* True if a thread is currently executing arena_purge_to_limit(). */ - bool purging; - - /* Number of pages in active runs and huge regions. */ - size_t nactive; - - /* - * Current count of pages within unused runs that are potentially - * dirty, and for which madvise(... MADV_DONTNEED) has not been called. - * By tracking this, we can institute a limit on how much dirty unused - * memory is mapped for each arena. - */ - size_t ndirty; - - /* - * Unused dirty memory this arena manages. Dirty memory is conceptually - * tracked as an arbitrarily interleaved LRU of dirty runs and cached - * chunks, but the list linkage is actually semi-duplicated in order to - * avoid extra arena_chunk_map_misc_t space overhead. - * - * LRU-----------------------------------------------------------MRU - * - * /-- arena ---\ - * | | - * | | - * |------------| /- chunk -\ - * ...->|chunks_cache|<--------------------------->| /----\ |<--... - * |------------| | |node| | - * | | | | | | - * | | /- run -\ /- run -\ | | | | - * | | | | | | | | | | - * | | | | | | | | | | - * |------------| |-------| |-------| | |----| | - * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... - * |------------| |-------| |-------| | |----| | - * | | | | | | | | | | - * | | | | | | | \----/ | - * | | \-------/ \-------/ | | - * | | | | - * | | | | - * \------------/ \---------/ - */ - arena_runs_dirty_link_t runs_dirty; - extent_node_t chunks_cache; - - /* Decay-based purging state. */ - arena_decay_t decay; - - /* Extant huge allocations. */ - ql_head(extent_node_t) huge; - /* Synchronizes all huge allocation/update/deallocation. */ - malloc_mutex_t huge_mtx; - - /* - * Trees of chunks that were previously allocated (trees differ only in - * node ordering). These are used when allocating chunks, in an attempt - * to re-use address space. Depending on function, different tree - * orderings are needed, which is why there are two trees with the same - * contents. - */ - extent_tree_t chunks_szsnad_cached; - extent_tree_t chunks_ad_cached; - extent_tree_t chunks_szsnad_retained; - extent_tree_t chunks_ad_retained; - - malloc_mutex_t chunks_mtx; - /* Cache of nodes that were allocated via base_alloc(). */ - ql_head(extent_node_t) node_cache; - malloc_mutex_t node_cache_mtx; - - /* User-configurable chunk hook functions. */ - chunk_hooks_t chunk_hooks; - - /* bins is used to store trees of free regions. */ - arena_bin_t bins[NBINS]; - - /* - * Size-segregated address-ordered heaps of this arena's available runs, - * used for first-best-fit run allocation. Runs are quantized, i.e. - * they reside in the last heap which corresponds to a size class less - * than or equal to the run size. - */ - arena_run_heap_t runs_avail[NPSIZES]; -}; - -/* Used in conjunction with tsd for fast arena-related context lookup. */ -struct arena_tdata_s { - ticker_t decay_ticker; -}; -#endif /* JEMALLOC_ARENA_STRUCTS_B */ - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -static const size_t large_pad = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - PAGE -#else - 0 -#endif - ; - -extern bool opt_thp; -extern purge_mode_t opt_purge; -extern const char *purge_mode_names[]; -extern ssize_t opt_lg_dirty_mult; -extern ssize_t opt_decay_time; - -extern arena_bin_info_t arena_bin_info[NBINS]; - -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t map_misc_offset; -extern size_t arena_maxrun; /* Max run size for arenas. */ -extern size_t large_maxclass; /* Max large size class. */ -extern unsigned nlclasses; /* Number of large size classes. */ -extern unsigned nhclasses; /* Number of huge size classes. */ - -#ifdef JEMALLOC_JET -typedef size_t (run_quantize_t)(size_t); -extern run_quantize_t *run_quantize_floor; -extern run_quantize_t *run_quantize_ceil; -#endif -void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, - bool cache); -void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, - bool cache); -extent_node_t *arena_node_alloc(tsdn_t *tsdn, arena_t *arena); -void arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node); -void *arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, size_t *sn, bool *zero); -void arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, - size_t usize, size_t sn); -void arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize); -void arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize, size_t sn); -bool arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, - void *chunk, size_t oldsize, size_t usize, bool *zero); -ssize_t arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena); -bool arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, - ssize_t lg_dirty_mult); -ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); -bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); -void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); -void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); -void arena_reset(tsd_t *tsd, arena_t *arena); -void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, - bool zero); -#ifdef JEMALLOC_JET -typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, - uint8_t); -extern arena_redzone_corruption_t *arena_redzone_corruption; -typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); -extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; -#else -void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); -#endif -void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind, - bool zero); -void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, - szind_t ind, bool zero); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size); -void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t pageind); -#ifdef JEMALLOC_JET -typedef void (arena_dalloc_junk_large_t)(void *, size_t); -extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; -#else -void arena_dalloc_junk_large(void *ptr, size_t usize); -#endif -void arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, - arena_chunk_t *chunk, void *ptr); -void arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - void *ptr); -#ifdef JEMALLOC_JET -typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); -extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; -#endif -bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t size, size_t extra, bool zero); -void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); -bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); -ssize_t arena_lg_dirty_mult_default_get(void); -bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -ssize_t arena_decay_time_default_get(void); -bool arena_decay_time_default_set(ssize_t decay_time); -void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, - unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult, - ssize_t *decay_time, size_t *nactive, size_t *ndirty); -void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, - const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, - size_t *nactive, size_t *ndirty, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, - malloc_huge_stats_t *hstats); -unsigned arena_nthreads_get(arena_t *arena, bool internal); -void arena_nthreads_inc(arena_t *arena, bool internal); -void arena_nthreads_dec(arena_t *arena, bool internal); -size_t arena_extent_sn_next(arena_t *arena); -arena_t *arena_new(tsdn_t *tsdn, unsigned ind); -void arena_boot(void); -void arena_prefork0(tsdn_t *tsdn, arena_t *arena); -void arena_prefork1(tsdn_t *tsdn, arena_t *arena); -void arena_prefork2(tsdn_t *tsdn, arena_t *arena); -void arena_prefork3(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_bits_t *arena_bitselm_get_mutable(arena_chunk_t *chunk, - size_t pageind); -const arena_chunk_map_bits_t *arena_bitselm_get_const( - const arena_chunk_t *chunk, size_t pageind); -arena_chunk_map_misc_t *arena_miscelm_get_mutable(arena_chunk_t *chunk, - size_t pageind); -const arena_chunk_map_misc_t *arena_miscelm_get_const( - const arena_chunk_t *chunk, size_t pageind); -size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); -void *arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm); -arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); -arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); -size_t *arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind); -const size_t *arena_mapbitsp_get_const(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbitsp_read(const size_t *mapbitsp); -size_t arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_size_decode(size_t mapbits); -size_t arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_large_size_get(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_small_runind_get(const arena_chunk_t *chunk, - size_t pageind); -szind_t arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_decommitted_get(const arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind); -void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); -size_t arena_mapbits_size_encode(size_t size); -void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size); -void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, - size_t flags); -void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - szind_t binind); -void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, szind_t binind, size_t flags); -void arena_metadata_allocated_add(arena_t *arena, size_t size); -void arena_metadata_allocated_sub(arena_t *arena, size_t size); -size_t arena_metadata_allocated_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); -szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, - const void *ptr); -prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *old_tctx); -void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); -void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); -void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache, bool slow_path); -arena_t *arena_aalloc(const void *ptr); -size_t arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote); -void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); -void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -# ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * -arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind) -{ - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return (&chunk->map_bits[pageind-map_bias]); -} - -JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t * -arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind)); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) -{ - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + - (uintptr_t)map_misc_offset) + pageind-map_bias); -} - -JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * -arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + - map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return (pageind); -} - -JEMALLOC_ALWAYS_INLINE void * -arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - - return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) -{ - arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t - *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); - - assert(arena_miscelm_to_pageind(miscelm) >= map_bias); - assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); - - return (miscelm); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_run_to_miscelm(arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t - *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); - - assert(arena_miscelm_to_pageind(miscelm) >= map_bias); - assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); - - return (miscelm); -} - -JEMALLOC_ALWAYS_INLINE size_t * -arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) -{ - - return (&arena_bitselm_get_mutable(chunk, pageind)->bits); -} - -JEMALLOC_ALWAYS_INLINE const size_t * -arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbitsp_read(const size_t *mapbitsp) -{ - - return (*mapbitsp); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind))); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_size_decode(size_t mapbits) -{ - size_t size; - -#if CHUNK_MAP_SIZE_SHIFT > 0 - size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; -#elif CHUNK_MAP_SIZE_SHIFT == 0 - size = mapbits & CHUNK_MAP_SIZE_MASK; -#else - size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; -#endif - - return (size); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return (arena_mapbits_size_decode(mapbits)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return (arena_mapbits_size_decode(mapbits)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - CHUNK_MAP_ALLOCATED); - return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); -} - -JEMALLOC_ALWAYS_INLINE szind_t -arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - szind_t binind; - - mapbits = arena_mapbits_get(chunk, pageind); - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - assert(binind < NBINS || binind == BININD_INVALID); - return (binind); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_DIRTY); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_UNZEROED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_DECOMMITTED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_LARGE); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) -{ - - *mapbitsp = mapbits; -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_size_encode(size_t size) -{ - size_t mapbits; - -#if CHUNK_MAP_SIZE_SHIFT > 0 - mapbits = size << CHUNK_MAP_SIZE_SHIFT; -#elif CHUNK_MAP_SIZE_SHIFT == 0 - mapbits = size; -#else - mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; -#endif - - assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); - return (mapbits); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - - assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); - assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - CHUNK_MAP_BININD_INVALID | flags); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - - assert((size & PAGE_MASK) == 0); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - (mapbits & ~CHUNK_MAP_SIZE_MASK)); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - - assert((flags & CHUNK_MAP_UNZEROED) == flags); - arena_mapbitsp_write(mapbitsp, flags); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - - assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); - assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - szind_t binind) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - - assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS + - large_pad); - arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | - (binind << CHUNK_MAP_BININD_SHIFT)); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - szind_t binind, size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); - - assert(binind < BININD_INVALID); - assert(pageind - runind >= map_bias); - assert((flags & CHUNK_MAP_UNZEROED) == flags); - arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) | - (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_INLINE void -arena_metadata_allocated_add(arena_t *arena, size_t size) -{ - - atomic_add_z(&arena->stats.metadata_allocated, size); -} - -JEMALLOC_INLINE void -arena_metadata_allocated_sub(arena_t *arena, size_t size) -{ - - atomic_sub_z(&arena->stats.metadata_allocated, size); -} - -JEMALLOC_INLINE size_t -arena_metadata_allocated_get(arena_t *arena) -{ - - return (atomic_read_z(&arena->stats.metadata_allocated)); -} - -JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes -= prof_interval; - return (true); - } - return (false); -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - return (arena_prof_accum_impl(arena, accumbytes)); -} - -JEMALLOC_INLINE bool -arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - - { - bool ret; - - malloc_mutex_lock(tsdn, &arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -arena_ptr_small_binind_get(const void *ptr, size_t mapbits) -{ - szind_t binind; - - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - - if (config_debug) { - arena_chunk_t *chunk; - arena_t *arena; - size_t pageind; - size_t actual_mapbits; - size_t rpages_ind; - const arena_run_t *run; - arena_bin_t *bin; - szind_t run_binind, actual_binind; - arena_bin_info_t *bin_info; - const arena_chunk_map_misc_t *miscelm; - const void *rpages; - - assert(binind != BININD_INVALID); - assert(binind < NBINS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = extent_node_arena_get(&chunk->node); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - actual_mapbits = arena_mapbits_get(chunk, pageind); - assert(mapbits == actual_mapbits); - assert(arena_mapbits_large_get(chunk, pageind) == 0); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, - pageind); - miscelm = arena_miscelm_get_const(chunk, rpages_ind); - run = &miscelm->run; - run_binind = run->binind; - bin = &arena->bins[run_binind]; - actual_binind = (szind_t)(bin - arena->bins); - assert(run_binind == actual_binind); - bin_info = &arena_bin_info[actual_binind]; - rpages = arena_miscelm_to_rpages(miscelm); - assert(((uintptr_t)ptr - ((uintptr_t)rpages + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - } - - return (binind); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE szind_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - szind_t binind = (szind_t)(bin - arena->bins); - assert(binind < NBINS); - return (binind); -} - -JEMALLOC_INLINE size_t -arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) -{ - size_t diff, interval, shift, regind; - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - void *rpages = arena_miscelm_to_rpages(miscelm); - - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ - assert((uintptr_t)ptr >= (uintptr_t)rpages + - (uintptr_t)bin_info->reg0_offset); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - - bin_info->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - interval = bin_info->reg_interval; - shift = ffs_zu(interval) - 1; - diff >>= shift; - interval >>= shift; - - if (interval == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) - static const size_t interval_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) - + 2))) { - regind = (diff * interval_invs[interval - 3]) >> - SIZE_INV_SHIFT; - } else - regind = diff / interval; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * interval); - assert(regind < bin_info->nregs); - - return (regind); -} - -JEMALLOC_INLINE prof_tctx_t * -arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) -{ - prof_tctx_t *ret; - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) - ret = (prof_tctx_t *)(uintptr_t)1U; - else { - arena_chunk_map_misc_t *elm = - arena_miscelm_get_mutable(chunk, pageind); - ret = atomic_read_p(&elm->prof_tctx_pun); - } - } else - ret = huge_prof_tctx_get(tsdn, ptr); - - return (ret); -} - -JEMALLOC_INLINE void -arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx) -{ - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - - if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > - (uintptr_t)1U)) { - arena_chunk_map_misc_t *elm; - - assert(arena_mapbits_large_get(chunk, pageind) != 0); - - elm = arena_miscelm_get_mutable(chunk, pageind); - atomic_write_p(&elm->prof_tctx_pun, tctx); - } else { - /* - * tctx must always be initialized for large runs. - * Assert that the surrounding conditional logic is - * equivalent to checking whether ptr refers to a large - * run. - */ - assert(arena_mapbits_large_get(chunk, pageind) == 0); - } - } else - huge_prof_tctx_set(tsdn, ptr, tctx); -} - -JEMALLOC_INLINE void -arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *old_tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && - (uintptr_t)old_tctx > (uintptr_t)1U))) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind; - arena_chunk_map_misc_t *elm; - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != - 0); - assert(arena_mapbits_large_get(chunk, pageind) != 0); - - elm = arena_miscelm_get_mutable(chunk, pageind); - atomic_write_p(&elm->prof_tctx_pun, - (prof_tctx_t *)(uintptr_t)1U); - } else - huge_prof_tctx_reset(tsdn, ptr); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) -{ - tsd_t *tsd; - ticker_t *decay_ticker; - - if (unlikely(tsdn_null(tsdn))) - return; - tsd = tsdn_tsd(tsdn); - decay_ticker = decay_ticker_get(tsd, arena->ind); - if (unlikely(decay_ticker == NULL)) - return; - if (unlikely(ticker_ticks(decay_ticker, nticks))) - arena_purge(tsdn, arena, false); -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_tick(tsdn_t *tsdn, arena_t *arena) -{ - - arena_decay_ticks(tsdn, arena, 1); -} - -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool slow_path) -{ - - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(size != 0); - - if (likely(tcache != NULL)) { - if (likely(size <= SMALL_MAXCLASS)) { - return (tcache_alloc_small(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - if (likely(size <= tcache_maxclass)) { - return (tcache_alloc_large(tsdn_tsd(tsdn), arena, - tcache, size, ind, zero, slow_path)); - } - /* (size > tcache_maxclass) case falls through. */ - assert(size > tcache_maxclass); - } - - return (arena_malloc_hard(tsdn, arena, size, ind, zero)); -} - -JEMALLOC_ALWAYS_INLINE arena_t * -arena_aalloc(const void *ptr) -{ - arena_chunk_t *chunk; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - return (extent_node_arena_get(&chunk->node)); - else - return (huge_aalloc(ptr)); -} - -/* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind; - szind_t binind; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (unlikely(binind == BININD_INVALID || (config_prof && !demote - && arena_mapbits_large_get(chunk, pageind) != 0))) { - /* - * Large allocation. In the common case (demote), and - * as this is an inline function, most callers will only - * end up looking at binind to determine that ptr is a - * small allocation. - */ - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind) - - large_pad; - assert(ret != 0); - assert(pageind + ((ret+large_pad)>>LG_PAGE) <= - chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, - pageind+((ret+large_pad)>>LG_PAGE)-1)); - } else { - /* - * Small allocation (possibly promoted to a large - * object). - */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, - arena_mapbits_get(chunk, pageind)) == binind); - ret = index2size(binind); - } - } else - ret = huge_salloc(tsdn, ptr); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, - binind, slow_path); - } else { - arena_dalloc_small(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr, pageind); - } - } else { - size_t size = arena_mapbits_large_size_get(chunk, - pageind); - - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - - if (likely(tcache != NULL) && size - large_pad <= - tcache_maxclass) { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - size - large_pad, slow_path); - } else { - arena_dalloc_large(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr); - } - } - } else - huge_dalloc(tsdn, ptr); -} - -JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path) -{ - arena_chunk_t *chunk; - - assert(!tsdn_null(tsdn) || tcache == NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - if (config_prof && opt_prof) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != - 0); - if (arena_mapbits_large_get(chunk, pageind) != 0) { - /* - * Make sure to use promoted size, not request - * size. - */ - size = arena_mapbits_large_size_get(chunk, - pageind) - large_pad; - } - } - assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false))); - - if (likely(size <= SMALL_MAXCLASS)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = size2index(size); - tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, - binind, slow_path); - } else { - size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr, pageind); - } - } else { - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - - if (likely(tcache != NULL) && size <= tcache_maxclass) { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - size, slow_path); - } else { - arena_dalloc_large(tsdn, - extent_node_arena_get(&chunk->node), chunk, - ptr); - } - } - } else - huge_dalloc(tsdn, ptr); -} -# endif /* JEMALLOC_ARENA_INLINE_B */ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h new file mode 100644 index 0000000..3a85bcb --- /dev/null +++ b/include/jemalloc/internal/arena_externs.h @@ -0,0 +1,96 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H + +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" + +extern ssize_t opt_dirty_decay_ms; +extern ssize_t opt_muzzy_decay_ms; + +extern const arena_bin_info_t arena_bin_info[NBINS]; + +extern percpu_arena_mode_t opt_percpu_arena; +extern const char *percpu_arena_mode_names[]; + +extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; +extern malloc_mutex_t arenas_lock; + +void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, + szind_t szind, uint64_t nrequests); +void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, + size_t size); +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, + unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, + ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); +void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, + size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, + malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); +void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +#ifdef JEMALLOC_JET +size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); +#endif +extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, + size_t usize, size_t alignment, bool *zero); +void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, + extent_t *extent); +void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +ssize_t arena_dirty_decay_ms_get(arena_t *arena); +bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); +ssize_t arena_muzzy_decay_ms_get(arena_t *arena); +bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); +void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, + bool all); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_destroy(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, + bool zero); + +typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); +extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; + +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); +void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); +void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + bool slow_path); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, void *ptr); +void arena_dalloc_small(tsdn_t *tsdn, void *ptr); +bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, tcache_t *tcache); +dss_prec_t arena_dss_prec_get(arena_t *arena); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_dirty_decay_ms_default_get(void); +bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); +ssize_t arena_muzzy_decay_ms_default_get(void); +bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +size_t arena_extent_sn_next(arena_t *arena); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_prefork4(tsdn_t *tsdn, arena_t *arena); +void arena_prefork5(tsdn_t *tsdn, arena_t *arena); +void arena_prefork6(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); + +#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h new file mode 100644 index 0000000..da58770 --- /dev/null +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -0,0 +1,57 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H + +static inline unsigned +arena_ind_get(const arena_t *arena) { + return base_ind_get(arena->base); +} + +static inline void +arena_internal_add(arena_t *arena, size_t size) { + atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline void +arena_internal_sub(arena_t *arena, size_t size) { + atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline size_t +arena_internal_get(arena_t *arena) { + return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); +} + +static inline bool +arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { + cassert(config_prof); + + if (likely(prof_interval == 0)) { + return false; + } + + return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); +} + +static inline void +percpu_arena_update(tsd_t *tsd, unsigned cpu) { + assert(have_percpu_arena); + arena_t *oldarena = tsd_arena_get(tsd); + assert(oldarena != NULL); + unsigned oldind = arena_ind_get(oldarena); + + if (oldind != cpu) { + unsigned newind = cpu; + arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); + assert(newarena != NULL); + + /* Set new arena/tcache associations. */ + arena_migrate(tsd, oldind, newind); + tcache_t *tcache = tcache_get(tsd); + if (tcache != NULL) { + tcache_arena_reassociate(tsd_tsdn(tsd), tcache, + newarena); + } + } +} + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */ diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h new file mode 100644 index 0000000..003abe1 --- /dev/null +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -0,0 +1,361 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/ticker.h" + +static inline szind_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) { + szind_t binind = (szind_t)(bin - arena->bins); + assert(binind < NBINS); + return binind; +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + /* Static check. */ + if (alloc_ctx == NULL) { + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(!extent_slab_get(extent))) { + return large_prof_tctx_get(tsdn, extent); + } + } else { + if (unlikely(!alloc_ctx->slab)) { + return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr)); + } + } + return (prof_tctx_t *)(uintptr_t)1U; +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + /* Static check. */ + if (alloc_ctx == NULL) { + extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(!extent_slab_get(extent))) { + large_prof_tctx_set(tsdn, extent, tctx); + } + } else { + if (unlikely(!alloc_ctx->slab)) { + large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx); + } + } +} + +static inline void +arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + extent_t *extent = iealloc(tsdn, ptr); + assert(!extent_slab_get(extent)); + + large_prof_tctx_reset(tsdn, extent); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { + tsd_t *tsd; + ticker_t *decay_ticker; + + if (unlikely(tsdn_null(tsdn))) { + return; + } + tsd = tsdn_tsd(tsdn); + decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena)); + if (unlikely(decay_ticker == NULL)) { + return; + } + if (unlikely(ticker_ticks(decay_ticker, nticks))) { + arena_decay(tsdn, arena, false, false); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { + malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx); + malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx); + + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(size != 0); + + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return tcache_alloc_small(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path); + } + if (likely(size <= tcache_maxclass)) { + return tcache_alloc_large(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + + return arena_malloc_hard(tsdn, arena, size, ind, zero); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(tsdn_t *tsdn, const void *ptr) { + return extent_arena_get(iealloc(tsdn, ptr)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true); + assert(szind != NSIZES); + + return sz_index2size(szind); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_vsalloc(tsdn_t *tsdn, const void *ptr) { + /* + * Return 0 if ptr is not within an extent managed by jemalloc. This + * function has two extra costs relative to isalloc(): + * - The rtree calls cannot claim to be dependent lookups, which induces + * rtree lookup load dependencies. + * - The lookup may fail, so there is an extra branch to check for + * failure. + */ + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + extent_t *extent; + szind_t szind; + if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, false, &extent, &szind)) { + return 0; + } + + if (extent == NULL) { + return 0; + } + assert(extent_state_get(extent) == extent_state_active); + /* Only slab members should be looked up via interior pointers. */ + assert(extent_addr_get(extent) == ptr || extent_slab_get(extent)); + + assert(szind != NSIZES); + + return sz_index2size(szind); +} + +static inline void +arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { + assert(ptr != NULL); + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + szind_t szind; + bool slab; + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, + true, &szind, &slab); + + if (config_debug) { + extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(szind < NSIZES); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (unlikely(tcache == NULL)) { + arena_dalloc_no_tcache(tsdn, ptr); + return; + } + + szind_t szind; + bool slab; + rtree_ctx_t *rtree_ctx; + if (alloc_ctx != NULL) { + szind = alloc_ctx->szind; + slab = alloc_ctx->slab; + assert(szind != NSIZES); + } else { + rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + } + + if (config_debug) { + rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(szind < NSIZES); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, + slow_path); + } else { + if (szind < nhbins) { + if (config_prof && unlikely(szind < NBINS)) { + arena_dalloc_promoted(tsdn, ptr, tcache, + slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + szind, slow_path); + } + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } + } +} + +static inline void +arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { + assert(ptr != NULL); + assert(size <= LARGE_MAXCLASS); + + szind_t szind; + bool slab; + if (!config_prof || !opt_prof) { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind = sz_size2index(size); + slab = (szind < NBINS); + } + + if ((config_prof && opt_prof) || config_debug) { + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, + &rtree_ctx_fallback); + + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + + assert(szind == sz_size2index(size)); + assert((config_prof && opt_prof) || slab == (szind < NBINS)); + + if (config_debug) { + extent_t *extent = rtree_extent_read(tsdn, + &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(slab == extent_slab_get(extent)); + } + } + + if (likely(slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + assert(size <= LARGE_MAXCLASS); + + if (unlikely(tcache == NULL)) { + arena_sdalloc_no_tcache(tsdn, ptr, size); + return; + } + + szind_t szind; + bool slab; + UNUSED alloc_ctx_t local_ctx; + if (config_prof && opt_prof) { + if (alloc_ctx == NULL) { + /* Uncommon case and should be a static check. */ + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, + &rtree_ctx_fallback); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &local_ctx.szind, + &local_ctx.slab); + assert(local_ctx.szind == sz_size2index(size)); + alloc_ctx = &local_ctx; + } + slab = alloc_ctx->slab; + szind = alloc_ctx->szind; + } else { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind = sz_size2index(size); + slab = (szind < NBINS); + } + + if (config_debug) { + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + extent_t *extent = rtree_extent_read(tsdn, + &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, + slow_path); + } else { + if (szind < nhbins) { + if (config_prof && unlikely(szind < NBINS)) { + arena_dalloc_promoted(tsdn, ptr, tcache, + slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), + tcache, ptr, szind, slow_path); + } + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } + } +} + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h new file mode 100644 index 0000000..46aa77c --- /dev/null +++ b/include/jemalloc/internal/arena_structs_a.h @@ -0,0 +1,11 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H + +#include "jemalloc/internal/bitmap.h" + +struct arena_slab_data_s { + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */ diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h new file mode 100644 index 0000000..d1fffec --- /dev/null +++ b/include/jemalloc/internal/arena_structs_b.h @@ -0,0 +1,284 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ticker.h" + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each slab has the following layout: + * + * /--------------------\ + * | region 0 | + * |--------------------| + * | region 1 | + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | region nregs-1 | + * \--------------------/ + */ +struct arena_bin_info_s { + /* Size of regions in a slab for this bin's size class. */ + size_t reg_size; + + /* Total size of a slab for this bin's size class. */ + size_t slab_size; + + /* Total number of regions in a slab for this bin's size class. */ + uint32_t nregs; + + /* + * Metadata used to manipulate bitmaps for slabs associated with this + * bin. + */ + bitmap_info_t bitmap_info; +}; + +struct arena_decay_s { + /* Synchronizes all non-atomic fields. */ + malloc_mutex_t mtx; + /* + * True if a thread is currently purging the extents associated with + * this decay structure. + */ + bool purging; + /* + * Approximate time in milliseconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + atomic_zd_t time_ms; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of unpurged pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay_*.nunpurged and + * extents_npages_get(&arena->extents_*) to determine how many dirty + * pages, if any, were generated. + */ + size_t nunpurged; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; + + /* + * Pointer to associated stats. These stats are embedded directly in + * the arena's stats due to how stats structures are shared between the + * arena and ctl code. + * + * Synchronization: Same as associated arena's stats field. */ + decay_stats_t *stats; + /* Peak number of pages in associated extents. Used for debug only. */ + uint64_t ceil_npages; +}; + +struct arena_bin_s { + /* All operations on arena_bin_t fields require lock ownership. */ + malloc_mutex_t lock; + + /* + * Current slab being used to service allocations of this bin's size + * class. slabcur is independent of slabs_{nonfull,full}; whenever + * slabcur is reassigned, the previous slab must be deallocated or + * inserted into slabs_{nonfull,full}. + */ + extent_t *slabcur; + + /* + * Heap of non-full slabs. This heap is used to assure that new + * allocations come from the non-full slab that is oldest/lowest in + * memory. + */ + extent_heap_t slabs_nonfull; + + /* List used to track full slabs. */ + extent_list_t slabs_full; + + /* Bin statistics. */ + malloc_bin_stats_t stats; +}; + +struct arena_s { + /* + * Number of threads currently assigned to this arena. Each thread has + * two distinct assignments, one for application-serving allocation, and + * the other for internal metadata allocation. Internal metadata must + * not be allocated from arenas explicitly created via the arenas.create + * mallctl, because the arena.<i>.reset mallctl indiscriminately + * discards all allocations for the affected arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + * + * Synchronization: atomic. + */ + atomic_u_t nthreads[2]; + + /* + * When percpu_arena is enabled, to amortize the cost of reading / + * updating the current CPU id, track the most recent thread accessing + * this arena, and only read CPU if there is a mismatch. + */ + tsdn_t *last_thd; + + /* Synchronization: internal. */ + arena_stats_t stats; + + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. + * + * Synchronization: tcache_ql_mtx. + */ + ql_head(tcache_t) tcache_ql; + malloc_mutex_t tcache_ql_mtx; + + /* Synchronization: internal. */ + prof_accum_t prof_accum; + uint64_t prof_accumbytes; + + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + * + * Synchronization: atomic. + */ + atomic_zu_t offset_state; + + /* + * Extent serial number generator state. + * + * Synchronization: atomic. + */ + atomic_zu_t extent_sn_next; + + /* + * Represents a dss_prec_t, but atomically. + * + * Synchronization: atomic. + */ + atomic_u_t dss_prec; + + /* + * Number of pages in active extents. + * + * Synchronization: atomic. + */ + atomic_zu_t nactive; + + /* + * Extant large allocations. + * + * Synchronization: large_mtx. + */ + extent_list_t large; + /* Synchronizes all large allocation/update/deallocation. */ + malloc_mutex_t large_mtx; + + /* + * Collections of extents that were previously allocated. These are + * used when allocating extents, in an attempt to re-use address space. + * + * Synchronization: internal. + */ + extents_t extents_dirty; + extents_t extents_muzzy; + extents_t extents_retained; + + /* + * Decay-based purging state, responsible for scheduling extent state + * transitions. + * + * Synchronization: internal. + */ + arena_decay_t decay_dirty; /* dirty --> muzzy */ + arena_decay_t decay_muzzy; /* muzzy --> retained */ + + /* + * Next extent size class in a growing series to use when satisfying a + * request via the extent hooks (only if opt_retain). This limits the + * number of disjoint virtual memory ranges so that extent merging can + * be effective even if multiple arenas' extent allocation requests are + * highly interleaved. + * + * Synchronization: extent_grow_mtx + */ + pszind_t extent_grow_next; + malloc_mutex_t extent_grow_mtx; + + /* + * Available extent structures that were allocated via + * base_alloc_extent(). + * + * Synchronization: extent_avail_mtx. + */ + extent_tree_t extent_avail; + malloc_mutex_t extent_avail_mtx; + + /* + * bins is used to store heaps of free regions. + * + * Synchronization: internal. + */ + arena_bin_t bins[NBINS]; + + /* + * Base allocator, from which arena metadata are allocated. + * + * Synchronization: internal. + */ + base_t *base; + /* Used to determine uptime. Read-only after initialization. */ + nstime_t create_time; +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; +}; + +/* Used to pass rtree lookup context down the path. */ +struct alloc_ctx_s { + szind_t szind; + bool slab; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */ diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h new file mode 100644 index 0000000..a691bd8 --- /dev/null +++ b/include/jemalloc/internal/arena_types.h @@ -0,0 +1,45 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H +#define JEMALLOC_INTERNAL_ARENA_TYPES_H + +/* Maximum number of regions in one slab. */ +#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN) +#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS) + +/* Default decay times in milliseconds. */ +#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) +#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000) +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + +typedef struct arena_slab_data_s arena_slab_data_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; +typedef struct alloc_ctx_s alloc_ctx_t; + +typedef enum { + percpu_arena_mode_names_base = 0, /* Used for options processing. */ + + /* + * *_uninit are used only during bootstrapping, and must correspond + * to initialized variant plus percpu_arena_mode_enabled_base. + */ + percpu_arena_uninit = 0, + per_phycpu_arena_uninit = 1, + + /* All non-disabled modes must come after percpu_arena_disabled. */ + percpu_arena_disabled = 2, + + percpu_arena_mode_names_limit = 3, /* Used for options processing. */ + percpu_arena_mode_enabled_base = 3, + + percpu_arena = 3, + per_phycpu_arena = 4 /* Hyper threads share arena. */ +} percpu_arena_mode_t; + +#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) +#define PERCPU_ARENA_DEFAULT percpu_arena_disabled + +#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h index 6f8f7eb..be4d45b 100644 --- a/include/jemalloc/internal/assert.h +++ b/include/jemalloc/internal/assert.h @@ -1,9 +1,12 @@ +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/util.h" + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. */ #ifndef assert -#define assert(e) do { \ +#define assert(e) do { \ if (unlikely(config_debug && !(e))) { \ malloc_printf( \ "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ @@ -14,7 +17,7 @@ #endif #ifndef not_reached -#define not_reached() do { \ +#define not_reached() do { \ if (config_debug) { \ malloc_printf( \ "<jemalloc>: %s:%d: Unreachable code reached\n", \ @@ -26,7 +29,7 @@ #endif #ifndef not_implemented -#define not_implemented() do { \ +#define not_implemented() do { \ if (config_debug) { \ malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ __FILE__, __LINE__); \ @@ -36,10 +39,18 @@ #endif #ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) { \ not_implemented(); \ + } \ } while (0) #endif - +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#ifndef cassert +#define cassert(c) do { \ + if (unlikely(!(c))) { \ + not_reached(); \ + } \ +} while (0) +#endif diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 3f15ea1..adadb1a 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -1,651 +1,77 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#define atomic_read_uint64(p) atomic_add_uint64(p, 0) -#define atomic_read_uint32(p) atomic_add_uint32(p, 0) -#define atomic_read_p(p) atomic_add_p(p, NULL) -#define atomic_read_z(p) atomic_add_z(p, 0) -#define atomic_read_u(p) atomic_add_u(p, 0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_INTERNAL_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_H + +#define ATOMIC_INLINE static inline + +#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_atomic.h" +#elif defined(JEMALLOC_GCC_SYNC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_sync.h" +#elif defined(_MSC_VER) +# include "jemalloc/internal/atomic_msvc.h" +#elif defined(JEMALLOC_C11_ATOMICS) +# include "jemalloc/internal/atomic_c11.h" +#else +# error "Don't have atomics implemented on this platform." +#endif /* - * All arithmetic functions return the arithmetic result of the atomic - * operation. Some atomic operation APIs return the value prior to mutation, in - * which case the following functions must redundantly compute the result so - * that it can be returned. These functions are normally inlined, so the extra - * operations can be optimized away if the return values aren't used by the - * callers. + * This header gives more or less a backport of C11 atomics. The user can write + * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate + * counterparts of the C11 atomic functions for type, as so: + * JEMALLOC_GENERATE_ATOMICS(int *, pi, 3); + * and then write things like: + * int *some_ptr; + * atomic_pi_t atomic_ptr_to_int; + * atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED); + * int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL); + * assert(some_ptr == prev_value); + * and expect things to work in the obvious way. * - * <t> atomic_read_<t>(<t> *p) { return (*p); } - * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p += x); } - * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p -= x); } - * bool atomic_cas_<t>(<t> *p, <t> c, <t> s) - * { - * if (*p != c) - * return (true); - * *p = s; - * return (false); - * } - * void atomic_write_<t>(<t> *p, <t> x) { *p = x; } + * Also included (with naming differences to avoid conflicts with the standard + * library): + * atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence). + * ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT). */ -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); -bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); -void atomic_write_uint64(uint64_t *p, uint64_t x); -uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); -bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); -void atomic_write_uint32(uint32_t *p, uint32_t x); -void *atomic_add_p(void **p, void *x); -void *atomic_sub_p(void **p, void *x); -bool atomic_cas_p(void **p, void *c, void *s); -void atomic_write_p(void **p, const void *x); -size_t atomic_add_z(size_t *p, size_t x); -size_t atomic_sub_z(size_t *p, size_t x); -bool atomic_cas_z(size_t *p, size_t c, size_t s); -void atomic_write_z(size_t *p, size_t x); -unsigned atomic_add_u(unsigned *p, unsigned x); -unsigned atomic_sub_u(unsigned *p, unsigned x); -bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); -void atomic_write_u(unsigned *p, unsigned x); -#endif +/* + * Pure convenience, so that we don't have to type "atomic_memory_order_" + * quite so often. + */ +#define ATOMIC_RELAXED atomic_memory_order_relaxed +#define ATOMIC_ACQUIRE atomic_memory_order_acquire +#define ATOMIC_RELEASE atomic_memory_order_release +#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel +#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ +/* + * Not all platforms have 64-bit atomics. If we do, this #define exposes that + * fact. + */ #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# if (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - uint64_t t = x; - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - uint64_t t; - - x = (uint64_t)(-(int64_t)x); - t = x; - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgq %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" /* Clobbers. */ - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "xchgq %1, %0;" /* Lock is implied by xchgq. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - atomic_store(a, x); -} -# elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - atomic_store_rel_long(p, x); -} -# elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - uint64_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_uint64(p); - } while (atomic_cas_uint64(p, o, x)); -} -# elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint64_t o; - - o = InterlockedCompareExchange64(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - InterlockedExchange64(p, x); -} -# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - __sync_lock_test_and_set(p, x); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif +# define JEMALLOC_ATOMIC_U64 #endif -/******************************************************************************/ -/* 32-bit operations. */ -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - uint32_t t = x; - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - uint32_t t; - - x = (uint32_t)(-(int32_t)x); - t = x; - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgl %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "xchgl %1, %0;" /* Lock is implied by xchgl. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_add(a, x) + x); -} +JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR) -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - atomic_store(a, x); -} -#elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!atomic_cmpset_32(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - atomic_store_rel_32(p, x); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - uint32_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_uint32(p); - } while (atomic_cas_uint32(p, o, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint32_t o; - - o = InterlockedCompareExchange(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - InterlockedExchange(p, x); -} -#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - __sync_lock_test_and_set(p, x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* Pointer operations. */ -JEMALLOC_INLINE void * -atomic_add_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE void * -atomic_sub_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_p(void **p, void *c, void *s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_p(void **p, const void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* size_t operations. */ -JEMALLOC_INLINE size_t -atomic_add_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE size_t -atomic_sub_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_z(size_t *p, size_t c, size_t s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -JEMALLOC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ +/* + * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only + * platform that actually needs to know the size, MSVC. + */ +JEMALLOC_GENERATE_ATOMICS(bool, b, 0) -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} +JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT) -JEMALLOC_INLINE bool -atomic_cas_u(unsigned *p, unsigned c, unsigned s) -{ +JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR) -#if (LG_SIZEOF_INT == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_INT == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} +JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR) -JEMALLOC_INLINE void -atomic_write_u(unsigned *p, unsigned x) -{ +JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2) -#if (LG_SIZEOF_INT == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#ifdef JEMALLOC_ATOMIC_U64 +JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3) #endif -} -/******************************************************************************/ -#endif +#undef ATOMIC_INLINE -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_ATOMIC_H */ diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h new file mode 100644 index 0000000..a5f9313 --- /dev/null +++ b/include/jemalloc/internal/atomic_c11.h @@ -0,0 +1,97 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H +#define JEMALLOC_INTERNAL_ATOMIC_C11_H + +#include <stdatomic.h> + +#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__) + +#define atomic_memory_order_t memory_order +#define atomic_memory_order_relaxed memory_order_relaxed +#define atomic_memory_order_acquire memory_order_acquire +#define atomic_memory_order_release memory_order_release +#define atomic_memory_order_acq_rel memory_order_acq_rel +#define atomic_memory_order_seq_cst memory_order_seq_cst + +#define atomic_fence atomic_thread_fence + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef _Atomic(type) atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + /* \ + * A strict interpretation of the C standard prevents \ + * atomic_load from taking a const argument, but it's \ + * convenient for our purposes. This cast is a workaround. \ + */ \ + atomic_##short_type##_t* a_nonconst = \ + (atomic_##short_type##_t*)a; \ + return atomic_load_explicit(a_nonconst, mo); \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + atomic_store_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return atomic_exchange_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_weak_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_strong_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} + +/* + * Integral types have some special operations available that non-integral ones + * lack. + */ +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_add_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_sub_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_and_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_or_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_xor_explicit(a, val, mo); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */ diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h new file mode 100644 index 0000000..6b73a14 --- /dev/null +++ b/include/jemalloc/internal/atomic_gcc_atomic.h @@ -0,0 +1,127 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H + +#include "jemalloc/internal/assert.h" + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE int +atomic_enum_to_builtin(atomic_memory_order_t mo) { + switch (mo) { + case atomic_memory_order_relaxed: + return __ATOMIC_RELAXED; + case atomic_memory_order_acquire: + return __ATOMIC_ACQUIRE; + case atomic_memory_order_release: + return __ATOMIC_RELEASE; + case atomic_memory_order_acq_rel: + return __ATOMIC_ACQ_REL; + case atomic_memory_order_seq_cst: + return __ATOMIC_SEQ_CST; + } + /* Can't happen; the switch is exhaustive. */ + not_reached(); +} + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + __atomic_thread_fence(atomic_enum_to_builtin(mo)); +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + __atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_exchange(&a->repr, &val, &result, \ + atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + true, atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + false, \ + atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_add(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_sub(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_and(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_or(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_xor(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */ diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h new file mode 100644 index 0000000..30846e4 --- /dev/null +++ b/include/jemalloc/internal/atomic_gcc_sync.h @@ -0,0 +1,191 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + /* Easy cases first: no barrier, and full barrier. */ + if (mo == atomic_memory_order_relaxed) { + asm volatile("" ::: "memory"); + return; + } + if (mo == atomic_memory_order_seq_cst) { + asm volatile("" ::: "memory"); + __sync_synchronize(); + asm volatile("" ::: "memory"); + return; + } + asm volatile("" ::: "memory"); +# if defined(__i386__) || defined(__x86_64__) + /* This is implicit on x86. */ +# elif defined(__ppc__) + asm volatile("lwsync"); +# elif defined(__sparc__) && defined(__arch64__) + if (mo == atomic_memory_order_acquire) { + asm volatile("membar #LoadLoad | #LoadStore"); + } else if (mo == atomic_memory_order_release) { + asm volatile("membar #LoadStore | #StoreStore"); + } else { + asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); + } +# else + __sync_synchronize(); +# endif + asm volatile("" ::: "memory"); +} + +/* + * A correct implementation of seq_cst loads and stores on weakly ordered + * architectures could do either of the following: + * 1. store() is weak-fence -> store -> strong fence, load() is load -> + * strong-fence. + * 2. store() is strong-fence -> store, load() is strong-fence -> load -> + * weak-fence. + * The tricky thing is, load() and store() above can be the load or store + * portions of a gcc __sync builtin, so we have to follow GCC's lead, which + * means going with strategy 2. + * On strongly ordered architectures, the natural strategy is to stick a strong + * fence after seq_cst stores, and have naked loads. So we want the strong + * fences in different places on different architectures. + * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to + * accomplish this. + */ + +ATOMIC_INLINE void +atomic_pre_sc_load_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_relaxed); +# else + atomic_fence(atomic_memory_order_seq_cst); +# endif +} + +ATOMIC_INLINE void +atomic_post_sc_store_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_seq_cst); +# else + atomic_fence(atomic_memory_order_relaxed); +# endif + +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type volatile repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_pre_sc_load_fence(); \ + } \ + type result = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_post_sc_store_fence(); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + /* \ + * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ + * an atomic exchange builtin. We fake it with a CAS loop. \ + */ \ + while (true) { \ + type old = a->repr; \ + if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ + return old; \ + } \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_add(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_sub(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_and(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_or(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_xor(&a->repr, val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h new file mode 100644 index 0000000..67057ce --- /dev/null +++ b/include/jemalloc/internal/atomic_msvc.h @@ -0,0 +1,158 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H +#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +typedef char atomic_repr_0_t; +typedef short atomic_repr_1_t; +typedef long atomic_repr_2_t; +typedef __int64 atomic_repr_3_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + _ReadWriteBarrier(); +# if defined(_M_ARM) || defined(_M_ARM64) + /* ARM needs a barrier for everything but relaxed. */ + if (mo != atomic_memory_order_relaxed) { + MemoryBarrier(); + } +# elif defined(_M_IX86) || defined (_M_X64) + /* x86 needs a barrier only for seq_cst. */ + if (mo == atomic_memory_order_seq_cst) { + MemoryBarrier(); + } +# else +# error "Don't know how to create atomics for this platform for MSVC." +# endif + _ReadWriteBarrier(); +} + +#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t + +#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b) +#define ATOMIC_RAW_CONCAT(a, b) a ## b + +#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \ + base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size)) + +#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \ + ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size) + +#define ATOMIC_INTERLOCKED_SUFFIX_0 8 +#define ATOMIC_INTERLOCKED_SUFFIX_1 16 +#define ATOMIC_INTERLOCKED_SUFFIX_2 +#define ATOMIC_INTERLOCKED_SUFFIX_3 64 + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ +typedef struct { \ + ATOMIC_INTERLOCKED_REPR(lg_size) repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return (type) ret; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_fence(atomic_memory_order_seq_cst); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) e = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \ + ATOMIC_INTERLOCKED_REPR(lg_size) d = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))desired; \ + ATOMIC_INTERLOCKED_REPR(lg_size) old = \ + ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \ + lg_size)(&a->repr, d, e); \ + if (old == e) { \ + return true; \ + } else { \ + *expected = (type)old; \ + return false; \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + /* We implement the weak version with strong semantics. */ \ + return atomic_compare_exchange_weak_##short_type(a, expected, \ + desired, success_mo, failure_mo); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + /* \ + * MSVC warns on negation of unsigned operands, but for us it \ + * gives exactly the right semantics (MAX_TYPE + 1 - operand). \ + */ \ + __pragma(warning(push)) \ + __pragma(warning(disable: 4146)) \ + return atomic_fetch_add_##short_type(a, -val, mo); \ + __pragma(warning(pop)) \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */ diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h new file mode 100644 index 0000000..7c88369 --- /dev/null +++ b/include/jemalloc/internal/background_thread_externs.h @@ -0,0 +1,30 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H + +extern bool opt_background_thread; +extern malloc_mutex_t background_thread_lock; +extern atomic_b_t background_thread_enabled_state; +extern size_t n_background_threads; +extern background_thread_info_t *background_thread_info; + +bool background_thread_create(tsd_t *tsd, unsigned arena_ind); +bool background_threads_enable(tsd_t *tsd); +bool background_threads_disable(tsd_t *tsd); +void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, size_t npages_new); +void background_thread_prefork0(tsdn_t *tsdn); +void background_thread_prefork1(tsdn_t *tsdn); +void background_thread_postfork_parent(tsdn_t *tsdn); +void background_thread_postfork_child(tsdn_t *tsdn); +bool background_thread_stats_read(tsdn_t *tsdn, + background_thread_stats_t *stats); +void background_thread_ctl_init(tsdn_t *tsdn); + +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER +extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); +#endif +bool background_thread_boot0(void); +bool background_thread_boot1(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h new file mode 100644 index 0000000..fd5095f --- /dev/null +++ b/include/jemalloc/internal/background_thread_inlines.h @@ -0,0 +1,56 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H + +JEMALLOC_ALWAYS_INLINE bool +background_thread_enabled(void) { + return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE void +background_thread_enabled_set(tsdn_t *tsdn, bool state) { + malloc_mutex_assert_owner(tsdn, &background_thread_lock); + atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE background_thread_info_t * +arena_background_thread_info_get(arena_t *arena) { + unsigned arena_ind = arena_ind_get(arena); + return &background_thread_info[arena_ind % ncpus]; +} + +JEMALLOC_ALWAYS_INLINE uint64_t +background_thread_wakeup_time_get(background_thread_info_t *info) { + uint64_t next_wakeup = nstime_ns(&info->next_wakeup); + assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) == + (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP)); + return next_wakeup; +} + +JEMALLOC_ALWAYS_INLINE void +background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info, + uint64_t wakeup_time) { + malloc_mutex_assert_owner(tsdn, &info->mtx); + atomic_store_b(&info->indefinite_sleep, + wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE); + nstime_init(&info->next_wakeup, wakeup_time); +} + +JEMALLOC_ALWAYS_INLINE bool +background_thread_indefinite_sleep(background_thread_info_t *info) { + return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE); +} + +JEMALLOC_ALWAYS_INLINE void +arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena) { + if (!background_thread_enabled()) { + return; + } + background_thread_info_t *info = + arena_background_thread_info_get(arena); + if (background_thread_indefinite_sleep(info)) { + background_thread_interval_check(tsdn, arena, + &arena->decay_dirty, 0); + } +} + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */ diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h new file mode 100644 index 0000000..e69a7d0 --- /dev/null +++ b/include/jemalloc/internal/background_thread_structs.h @@ -0,0 +1,52 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H + +/* This file really combines "structs" and "types", but only transitionally. */ + +#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +# define JEMALLOC_PTHREAD_CREATE_WRAPPER +#endif + +#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX + +typedef enum { + background_thread_stopped, + background_thread_started, + /* Thread waits on the global lock when paused (for arena_reset). */ + background_thread_paused, +} background_thread_state_t; + +struct background_thread_info_s { +#ifdef JEMALLOC_BACKGROUND_THREAD + /* Background thread is pthread specific. */ + pthread_t thread; + pthread_cond_t cond; +#endif + malloc_mutex_t mtx; + background_thread_state_t state; + /* When true, it means no wakeup scheduled. */ + atomic_b_t indefinite_sleep; + /* Next scheduled wakeup time (absolute time in ns). */ + nstime_t next_wakeup; + /* + * Since the last background thread run, newly added number of pages + * that need to be purged by the next wakeup. This is adjusted on + * epoch advance, and is used to determine whether we should signal the + * background thread to wake up earlier. + */ + size_t npages_to_purge_new; + /* Stats: total number of runs since started. */ + uint64_t tot_n_runs; + /* Stats: total sleep time since started. */ + nstime_t tot_sleep_time; +}; +typedef struct background_thread_info_s background_thread_info_t; + +struct background_thread_stats_s { + size_t num_threads; + uint64_t num_runs; + nstime_t run_interval; +}; +typedef struct background_thread_stats_s background_thread_stats_t; + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */ diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h deleted file mode 100644 index d6b81e1..0000000 --- a/include/jemalloc/internal/base.h +++ /dev/null @@ -1,25 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *base_alloc(tsdn_t *tsdn, size_t size); -void base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, - size_t *mapped); -bool base_boot(void); -void base_prefork(tsdn_t *tsdn); -void base_postfork_parent(tsdn_t *tsdn); -void base_postfork_child(tsdn_t *tsdn); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h new file mode 100644 index 0000000..0a1114f --- /dev/null +++ b/include/jemalloc/internal/base_externs.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H +#define JEMALLOC_INTERNAL_BASE_EXTERNS_H + +base_t *b0get(void); +base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void base_delete(base_t *base); +extent_hooks_t *base_extent_hooks_get(base_t *base); +extent_hooks_t *base_extent_hooks_set(base_t *base, + extent_hooks_t *extent_hooks); +void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); +extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base); +void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, + size_t *resident, size_t *mapped); +void base_prefork(tsdn_t *tsdn, base_t *base); +void base_postfork_parent(tsdn_t *tsdn, base_t *base); +void base_postfork_child(tsdn_t *tsdn, base_t *base); +bool base_boot(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */ diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h new file mode 100644 index 0000000..931560b --- /dev/null +++ b/include/jemalloc/internal/base_inlines.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H +#define JEMALLOC_INTERNAL_BASE_INLINES_H + +static inline unsigned +base_ind_get(const base_t *base) { + return base->ind; +} + +#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */ diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h new file mode 100644 index 0000000..18e227b --- /dev/null +++ b/include/jemalloc/internal/base_structs.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H +#define JEMALLOC_INTERNAL_BASE_STRUCTS_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/size_classes.h" + +/* Embedded at the beginning of every block of base-managed virtual memory. */ +struct base_block_s { + /* Total size of block's virtual memory mapping. */ + size_t size; + + /* Next block in list of base's blocks. */ + base_block_t *next; + + /* Tracks unused trailing space. */ + extent_t extent; +}; + +struct base_s { + /* Associated arena's index within the arenas array. */ + unsigned ind; + + /* + * User-configurable extent hook functions. Points to an + * extent_hooks_t. + */ + atomic_p_t extent_hooks; + + /* Protects base_alloc() and base_stats_get() operations. */ + malloc_mutex_t mtx; + + /* + * Most recent size class in the series of increasingly large base + * extents. Logarithmic spacing between subsequent allocations ensures + * that the total number of distinct mappings remains small. + */ + pszind_t pind_last; + + /* Serial number generation state. */ + size_t extent_sn_next; + + /* Chain of all blocks associated with base. */ + base_block_t *blocks; + + /* Heap of extents that track unused trailing space within blocks. */ + extent_heap_t avail[NSIZES]; + + /* Stats, only maintained if config_stats. */ + size_t allocated; + size_t resident; + size_t mapped; +}; + +#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h new file mode 100644 index 0000000..be7ee82 --- /dev/null +++ b/include/jemalloc/internal/base_types.h @@ -0,0 +1,7 @@ +#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H +#define JEMALLOC_INTERNAL_BASE_TYPES_H + +typedef struct base_block_s base_block_t; +typedef struct base_s base_t; + +#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h new file mode 100644 index 0000000..8d078a8 --- /dev/null +++ b/include/jemalloc/internal/bit_util.h @@ -0,0 +1,165 @@ +#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H +#define JEMALLOC_INTERNAL_BIT_UTIL_H + +#include "jemalloc/internal/assert.h" + +#define BIT_UTIL_INLINE static inline + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure +#endif + + +BIT_UTIL_INLINE unsigned +ffs_llu(unsigned long long bitmap) { + return JEMALLOC_INTERNAL_FFSLL(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_lu(unsigned long bitmap) { + return JEMALLOC_INTERNAL_FFSL(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_u(unsigned bitmap) { + return JEMALLOC_INTERNAL_FFS(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_zu(size_t bitmap) { +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return ffs_u(bitmap); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return ffs_lu(bitmap); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return ffs_llu(bitmap); +#else +#error No implementation for size_t ffs() +#endif +} + +BIT_UTIL_INLINE unsigned +ffs_u64(uint64_t bitmap) { +#if LG_SIZEOF_LONG == 3 + return ffs_lu(bitmap); +#elif LG_SIZEOF_LONG_LONG == 3 + return ffs_llu(bitmap); +#else +#error No implementation for 64-bit ffs() +#endif +} + +BIT_UTIL_INLINE unsigned +ffs_u32(uint32_t bitmap) { +#if LG_SIZEOF_INT == 2 + return ffs_u(bitmap); +#else +#error No implementation for 32-bit ffs() +#endif + return ffs_u(bitmap); +} + +BIT_UTIL_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + x++; + return x; +} + +BIT_UTIL_INLINE uint32_t +pow2_ceil_u32(uint32_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return x; +} + +/* Compute the smallest power of 2 that is >= x. */ +BIT_UTIL_INLINE size_t +pow2_ceil_zu(size_t x) { +#if (LG_SIZEOF_PTR == 3) + return pow2_ceil_u64(x); +#else + return pow2_ceil_u32(x); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + size_t ret; + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + assert(ret < UINT_MAX); + return (unsigned)ret; +} +#elif (defined(_MSC_VER)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type size for lg_floor()" +#endif + assert(ret < UINT_MAX); + return (unsigned)ret; +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x); +#else +# error "Unsupported type size for lg_floor()" +#endif +} +#else +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3) + x |= (x >> 32); +#endif + if (x == SIZE_T_MAX) { + return (8 << LG_SIZEOF_PTR) - 1; + } + x++; + return ffs_zu(x) - 2; +} +#endif + +#undef BIT_UTIL_INLINE + +#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */ diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 36f38b5..ac99029 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -1,19 +1,27 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_BITMAP_H +#define JEMALLOC_INTERNAL_BITMAP_H -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS -#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/size_classes.h" -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES +/* Maximum bitmap bit count is determined by maximum regions per slab. */ +# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS +#else +/* Maximum bitmap bit count is determined by number of extent size classes. */ +# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES +#endif +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) /* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) /* * Do some analysis on how big the bitmap is before we use a tree. For a brute @@ -21,81 +29,131 @@ typedef unsigned long bitmap_t; * use a tree instead. */ #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 -# define USE_TREE +# define BITMAP_USE_TREE #endif /* Number of groups required to store a given number of bits. */ -#define BITMAP_BITS2GROUPS(nbits) \ - ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) +#define BITMAP_BITS2GROUPS(nbits) \ + (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) /* * Number of groups required at a particular level for a given number of bits. */ -#define BITMAP_GROUPS_L0(nbits) \ +#define BITMAP_GROUPS_L0(nbits) \ BITMAP_BITS2GROUPS(nbits) -#define BITMAP_GROUPS_L1(nbits) \ +#define BITMAP_GROUPS_L1(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) -#define BITMAP_GROUPS_L2(nbits) \ +#define BITMAP_GROUPS_L2(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) -#define BITMAP_GROUPS_L3(nbits) \ +#define BITMAP_GROUPS_L3(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ BITMAP_BITS2GROUPS((nbits))))) +#define BITMAP_GROUPS_L4(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))) /* * Assuming the number of levels, number of groups required for a given number * of bits. */ -#define BITMAP_GROUPS_1_LEVEL(nbits) \ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ BITMAP_GROUPS_L0(nbits) -#define BITMAP_GROUPS_2_LEVEL(nbits) \ +#define BITMAP_GROUPS_2_LEVEL(nbits) \ (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) -#define BITMAP_GROUPS_3_LEVEL(nbits) \ +#define BITMAP_GROUPS_3_LEVEL(nbits) \ (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) -#define BITMAP_GROUPS_4_LEVEL(nbits) \ +#define BITMAP_GROUPS_4_LEVEL(nbits) \ (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) +#define BITMAP_GROUPS_5_LEVEL(nbits) \ + (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits)) /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ -#ifdef USE_TREE +#ifdef BITMAP_USE_TREE #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS) #else # error "Unsupported bitmap size" #endif -/* Maximum number of levels possible. */ -#define BITMAP_MAX_LEVELS \ - (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) +/* + * Maximum number of levels possible. This could be statically computed based + * on LG_BITMAP_MAXBITS: + * + * #define BITMAP_MAX_LEVELS \ + * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + * + * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so + * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the + * various cascading macros. The only additional cost this incurs is some + * unused trailing entries in bitmap_info_t structures; the bitmaps themselves + * are not impacted. + */ +#define BITMAP_MAX_LEVELS 5 + +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* nlevels. */ \ + (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \ + (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \ + (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \ + (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \ + /* levels. */ \ + { \ + {0}, \ + {BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \ + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \ + BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \ + BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \ + + BITMAP_GROUPS_L0(nbits)} \ + } \ +} -#else /* USE_TREE */ +#else /* BITMAP_USE_TREE */ -#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) +#define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) -#endif /* USE_TREE */ +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* ngroups. */ \ + BITMAP_BITS2GROUPS(nbits) \ +} -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +#endif /* BITMAP_USE_TREE */ -struct bitmap_level_s { +typedef struct bitmap_level_s { /* Offset of this level's groups within the array of groups. */ size_t group_offset; -}; +} bitmap_level_t; -struct bitmap_info_s { +typedef struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; -#ifdef USE_TREE +#ifdef BITMAP_USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -104,37 +162,19 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -#else /* USE_TREE */ +#else /* BITMAP_USE_TREE */ /* Number of groups necessary for nbits. */ size_t ngroups; -#endif /* USE_TREE */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); -size_t bitmap_size(const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif +#endif /* BITMAP_USE_TREE */ +} bitmap_info_t; + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill); +size_t bitmap_size(const bitmap_info_t *binfo); -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ -#ifdef USE_TREE +static inline bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { +#ifdef BITMAP_USE_TREE size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ @@ -143,28 +183,27 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) size_t i; for (i = 0; i < binfo->ngroups; i++) { - if (bitmap[i] != 0) - return (false); + if (bitmap[i] != 0) { + return false; + } } - return (true); + return true; #endif } -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t g; assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); + return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); } -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t *gp; bitmap_t g; @@ -178,7 +217,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); -#ifdef USE_TREE +#ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -190,24 +229,83 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (g != 0) + if (g != 0) { break; + } } } #endif } +/* ffu: find first unset >= bit. */ +static inline size_t +bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) { + assert(min_bit < binfo->nbits); + +#ifdef BITMAP_USE_TREE + size_t bit = 0; + for (unsigned level = binfo->nlevels; level--;) { + size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level + + 1)); + bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit + >> lg_bits_per_group)]; + unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit - + bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS)); + assert(group_nmask <= BITMAP_GROUP_NBITS); + bitmap_t group_mask = ~((1LU << group_nmask) - 1); + bitmap_t group_masked = group & group_mask; + if (group_masked == 0LU) { + if (group == 0LU) { + return binfo->nbits; + } + /* + * min_bit was preceded by one or more unset bits in + * this group, but there are no other unset bits in this + * group. Try again starting at the first bit of the + * next sibling. This will recurse at most once per + * non-root level. + */ + size_t sib_base = bit + (ZU(1) << lg_bits_per_group); + assert(sib_base > min_bit); + assert(sib_base > bit); + if (sib_base >= binfo->nbits) { + return binfo->nbits; + } + return bitmap_ffu(bitmap, binfo, sib_base); + } + bit += ((size_t)(ffs_lu(group_masked) - 1)) << + (lg_bits_per_group - LG_BITMAP_GROUP_NBITS); + } + assert(bit >= min_bit); + assert(bit < binfo->nbits); + return bit; +#else + size_t i = min_bit >> LG_BITMAP_GROUP_NBITS; + bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) + - 1); + size_t bit; + do { + bit = ffs_lu(g); + if (bit != 0) { + return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); + } + i++; + g = bitmap[i]; + } while (i < binfo->ngroups); + return binfo->nbits; +#endif +} + /* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ +static inline size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { size_t bit; bitmap_t g; unsigned i; assert(!bitmap_full(bitmap, binfo)); -#ifdef USE_TREE +#ifdef BITMAP_USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; bit = ffs_lu(g) - 1; @@ -226,12 +324,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); #endif bitmap_set(bitmap, binfo, bit); - return (bit); + return bit; } -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t *gp; bitmap_t g; @@ -247,7 +344,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); -#ifdef USE_TREE +#ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -261,14 +358,12 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) == 0); g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (!propagate) + if (!propagate) { break; + } } } -#endif /* USE_TREE */ +#endif /* BITMAP_USE_TREE */ } -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_BITMAP_H */ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h deleted file mode 100644 index 55df9ac..0000000 --- a/include/jemalloc/internal/chunk.h +++ /dev/null @@ -1,97 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Size and alignment of memory chunks that are allocated by the OS's virtual - * memory system. - */ -#define LG_CHUNK_DEFAULT 21 - -/* Return the chunk address for allocation address a. */ -#define CHUNK_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~chunksize_mask)) - -/* Return the chunk offset of address a. */ -#define CHUNK_ADDR2OFFSET(a) \ - ((size_t)((uintptr_t)(a) & chunksize_mask)) - -/* Return the smallest chunk multiple that is >= s. */ -#define CHUNK_CEILING(s) \ - (((s) + chunksize_mask) & ~chunksize_mask) - -#define CHUNK_HOOKS_INITIALIZER { \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL \ -} - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern size_t opt_lg_chunk; -extern const char *opt_dss; - -extern rtree_t chunks_rtree; - -extern size_t chunksize; -extern size_t chunksize_mask; /* (chunksize - 1). */ -extern size_t chunk_npages; - -extern const chunk_hooks_t chunk_hooks_default; - -chunk_hooks_t chunk_hooks_get(tsdn_t *tsdn, arena_t *arena); -chunk_hooks_t chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, - const chunk_hooks_t *chunk_hooks); - -bool chunk_register(const void *chunk, const extent_node_t *node, - bool *gdump); -void chunk_deregister(const void *chunk, const extent_node_t *node); -void *chunk_alloc_base(size_t size); -void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - size_t *sn, bool *zero, bool *commit, bool dalloc_node); -void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, - size_t *sn, bool *zero, bool *commit); -void chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, - bool committed); -void chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn, - bool zeroed, bool committed); -bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, - chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, - size_t length); -bool chunk_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -extent_node_t *chunk_lookup(const void *chunk, bool dependent); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) -JEMALLOC_INLINE extent_node_t * -chunk_lookup(const void *ptr, bool dependent) -{ - - return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent)); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/chunk_dss.h" -#include "jemalloc/internal/chunk_mmap.h" diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h deleted file mode 100644 index da8511b..0000000 --- a/include/jemalloc/internal/chunk_dss.h +++ /dev/null @@ -1,37 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef enum { - dss_prec_disabled = 0, - dss_prec_primary = 1, - dss_prec_secondary = 2, - - dss_prec_limit = 3 -} dss_prec_t; -#define DSS_PREC_DEFAULT dss_prec_secondary -#define DSS_DEFAULT "secondary" - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -extern const char *dss_prec_names[]; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -dss_prec_t chunk_dss_prec_get(void); -bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, - size_t size, size_t alignment, bool *zero, bool *commit); -bool chunk_in_dss(void *chunk); -bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); -void chunk_dss_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h deleted file mode 100644 index 6f2d0ac..0000000 --- a/include/jemalloc/internal/chunk_mmap.h +++ /dev/null @@ -1,21 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, - bool *zero, bool *commit); -bool chunk_dalloc_mmap(void *chunk, size_t size); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index f75ad90..7b3850b 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -1,86 +1,101 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_CKH_H +#define JEMALLOC_INTERNAL_CKH_H -typedef struct ckh_s ckh_t; -typedef struct ckhc_s ckhc_t; +#include "jemalloc/internal/tsd.h" -/* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, size_t[2]); -typedef bool ckh_keycomp_t (const void *, const void *); +/* Cuckoo hashing implementation. Skip to the end for the interface. */ + +/******************************************************************************/ +/* INTERNAL DEFINITIONS -- IGNORE */ +/******************************************************************************/ /* Maintain counters used to get an idea of performance. */ -/* #define CKH_COUNT */ +/* #define CKH_COUNT */ /* Print counter values in ckh_delete() (requires CKH_COUNT). */ -/* #define CKH_VERBOSE */ +/* #define CKH_VERBOSE */ /* * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit * one bucket per L1 cache line. */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, size_t[2]); +typedef bool ckh_keycomp_t (const void *, const void *); /* Hash table cell. */ -struct ckhc_s { - const void *key; - const void *data; -}; +typedef struct { + const void *key; + const void *data; +} ckhc_t; -struct ckh_s { +/* The hash table itself. */ +typedef struct { #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ - uint64_t ngrows; - uint64_t nshrinks; - uint64_t nshrinkfails; - uint64_t ninserts; - uint64_t nrelocs; + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; #endif /* Used for pseudo-random number generation. */ - uint64_t prng_state; + uint64_t prng_state; /* Total number of items. */ - size_t count; + size_t count; /* * Minimum and current number of hash table buckets. There are * 2^LG_CKH_BUCKET_CELLS cells per bucket. */ - unsigned lg_minbuckets; - unsigned lg_curbuckets; + unsigned lg_minbuckets; + unsigned lg_curbuckets; /* Hash and comparison functions. */ - ckh_hash_t *hash; - ckh_keycomp_t *keycomp; + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; /* Hash table with 2^lg_curbuckets buckets. */ - ckhc_t *tab; -}; + ckhc_t *tab; +} ckh_t; -#endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +/* BEGIN PUBLIC API */ +/******************************************************************************/ -bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +/* Lifetime management. Minitems is the initial capacity. */ +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(tsd_t *tsd, ckh_t *ckh); -size_t ckh_count(ckh_t *ckh); -bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, +void ckh_delete(tsd_t *tsd, ckh_t *ckh); + +/* Get the number of elements in the set. */ +size_t ckh_count(ckh_t *ckh); + +/* + * To iterate over the elements in the table, initialize *tabind to 0 and call + * this function until it returns true. Each call that returns false will + * update *key and *data to the next element in the table, assuming the pointers + * are non-NULL. + */ +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); + +/* + * Basic hash table operations -- insert, removal, lookup. For ckh_remove and + * ckh_search, key or data can be NULL. The hash-table only stores pointers to + * the key and value, and doesn't do any lifetime management. + */ +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); -void ckh_string_hash(const void *key, size_t r_hash[2]); -bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, size_t r_hash[2]); -bool ckh_pointer_keycomp(const void *k1, const void *k2); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +/* Some useful hash and comparison functions for strings and pointers. */ +void ckh_string_hash(const void *key, size_t r_hash[2]); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); +bool ckh_pointer_keycomp(const void *k1, const void *k2); -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_CKH_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index af0f6d7..f159383 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -1,88 +1,107 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ctl_node_s ctl_node_t; -typedef struct ctl_named_node_s ctl_named_node_t; -typedef struct ctl_indexed_node_s ctl_indexed_node_t; -typedef struct ctl_arena_stats_s ctl_arena_stats_t; -typedef struct ctl_stats_s ctl_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ctl_node_s { - bool named; -}; - -struct ctl_named_node_s { - struct ctl_node_s node; - const char *name; +#ifndef JEMALLOC_INTERNAL_CTL_H +#define JEMALLOC_INTERNAL_CTL_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" + +/* Maximum ctl tree depth. */ +#define CTL_MAX_DEPTH 7 + +typedef struct ctl_node_s { + bool named; +} ctl_node_t; + +typedef struct ctl_named_node_s { + ctl_node_t node; + const char *name; /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - int (*ctl)(tsd_t *, const size_t *, size_t, void *, - size_t *, void *, size_t); -}; - -struct ctl_indexed_node_s { - struct ctl_node_s node; - const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t nchildren; + const ctl_node_t *children; + int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *, size_t); -}; +} ctl_named_node_t; -struct ctl_arena_stats_s { - bool initialized; - unsigned nthreads; - const char *dss; - ssize_t lg_dirty_mult; - ssize_t decay_time; - size_t pactive; - size_t pdirty; - - /* The remainder are only populated if config_stats is true. */ +typedef struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t); +} ctl_indexed_node_t; - arena_stats_t astats; +typedef struct ctl_arena_stats_s { + arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ - size_t allocated_small; - uint64_t nmalloc_small; - uint64_t ndalloc_small; - uint64_t nrequests_small; - - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t *lstats; /* nlclasses elements. */ - malloc_huge_stats_t *hstats; /* nhclasses elements. */ -}; - -struct ctl_stats_s { - size_t allocated; - size_t active; - size_t metadata; - size_t resident; - size_t mapped; - size_t retained; - unsigned narenas; - ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t bstats[NBINS]; + malloc_large_stats_t lstats[NSIZES - NBINS]; +} ctl_arena_stats_t; + +typedef struct ctl_stats_s { + size_t allocated; + size_t active; + size_t metadata; + size_t resident; + size_t mapped; + size_t retained; + + background_thread_stats_t background_thread; + mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes]; +} ctl_stats_t; + +typedef struct ctl_arena_s ctl_arena_t; +struct ctl_arena_s { + unsigned arena_ind; + bool initialized; + ql_elm(ctl_arena_t) destroyed_link; + + /* Basic stats, supported even if !config_stats. */ + unsigned nthreads; + const char *dss; + ssize_t dirty_decay_ms; + ssize_t muzzy_decay_ms; + size_t pactive; + size_t pdirty; + size_t pmuzzy; + + /* NULL if !config_stats. */ + ctl_arena_stats_t *astats; }; -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, +typedef struct ctl_arenas_s { + uint64_t epoch; + unsigned narenas; + ql_head(ctl_arena_t) destroyed; + + /* + * Element 0 corresponds to merged stats for extant arenas (accessed via + * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for + * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the + * remaining MALLOCX_ARENA_LIMIT elements correspond to arenas. + */ + ctl_arena_t *arenas[2 + MALLOCX_ARENA_LIMIT]; +} ctl_arenas_t; + +int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, +int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp); -int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, +int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -bool ctl_boot(void); -void ctl_prefork(tsdn_t *tsdn); -void ctl_postfork_parent(tsdn_t *tsdn); -void ctl_postfork_child(tsdn_t *tsdn); +bool ctl_boot(void); +void ctl_prefork(tsdn_t *tsdn); +void ctl_postfork_parent(tsdn_t *tsdn); +void ctl_postfork_child(tsdn_t *tsdn); -#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ malloc_printf( \ @@ -92,7 +111,7 @@ void ctl_postfork_child(tsdn_t *tsdn); } \ } while (0) -#define xmallctlnametomib(name, mibp, miblenp) do { \ +#define xmallctlnametomib(name, mibp, miblenp) do { \ if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ malloc_printf("<jemalloc>: Failure in " \ "xmallctlnametomib(\"%s\", ...)\n", name); \ @@ -100,7 +119,7 @@ void ctl_postfork_child(tsdn_t *tsdn); } \ } while (0) -#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ @@ -109,10 +128,4 @@ void ctl_postfork_child(tsdn_t *tsdn); } \ } while (0) -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - +#endif /* JEMALLOC_INTERNAL_CTL_H */ diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h deleted file mode 100644 index fc77f9f..0000000 --- a/include/jemalloc/internal/extent.h +++ /dev/null @@ -1,275 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct extent_node_s extent_node_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Tree of extents. Use accessor functions for en_* fields. */ -struct extent_node_s { - /* Arena from which this extent came, if any. */ - arena_t *en_arena; - - /* Pointer to the extent that this tree node is responsible for. */ - void *en_addr; - - /* Total region size. */ - size_t en_size; - - /* - * Serial number (potentially non-unique). - * - * In principle serial numbers can wrap around on 32-bit systems if - * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall - * back on address comparison for equal serial numbers, stable (if - * imperfect) ordering is maintained. - * - * Serial numbers may not be unique even in the absence of wrap-around, - * e.g. when splitting an extent and assigning the same serial number to - * both resulting adjacent extents. - */ - size_t en_sn; - - /* - * The zeroed flag is used by chunk recycling code to track whether - * memory is zero-filled. - */ - bool en_zeroed; - - /* - * True if physical memory is committed to the extent, whether - * explicitly or implicitly as on a system that overcommits and - * satisfies physical memory needs on demand via soft page faults. - */ - bool en_committed; - - /* - * The achunk flag is used to validate that huge allocation lookups - * don't return arena chunks. - */ - bool en_achunk; - - /* Profile counters, used for huge objects. */ - prof_tctx_t *en_prof_tctx; - - /* Linkage for arena's runs_dirty and chunks_cache rings. */ - arena_runs_dirty_link_t rd; - qr(extent_node_t) cc_link; - - union { - /* Linkage for the size/sn/address-ordered tree. */ - rb_node(extent_node_t) szsnad_link; - - /* Linkage for arena's achunks, huge, and node_cache lists. */ - ql_elm(extent_node_t) ql_link; - }; - - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) ad_link; -}; -typedef rb_tree(extent_node_t) extent_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_JET -size_t extent_size_quantize_floor(size_t size); -#endif -size_t extent_size_quantize_ceil(size_t size); - -rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t) - -rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -arena_t *extent_node_arena_get(const extent_node_t *node); -void *extent_node_addr_get(const extent_node_t *node); -size_t extent_node_size_get(const extent_node_t *node); -size_t extent_node_sn_get(const extent_node_t *node); -bool extent_node_zeroed_get(const extent_node_t *node); -bool extent_node_committed_get(const extent_node_t *node); -bool extent_node_achunk_get(const extent_node_t *node); -prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); -void extent_node_arena_set(extent_node_t *node, arena_t *arena); -void extent_node_addr_set(extent_node_t *node, void *addr); -void extent_node_size_set(extent_node_t *node, size_t size); -void extent_node_sn_set(extent_node_t *node, size_t sn); -void extent_node_zeroed_set(extent_node_t *node, bool zeroed); -void extent_node_committed_set(extent_node_t *node, bool committed); -void extent_node_achunk_set(extent_node_t *node, bool achunk); -void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); -void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, - size_t size, size_t sn, bool zeroed, bool committed); -void extent_node_dirty_linkage_init(extent_node_t *node); -void extent_node_dirty_insert(extent_node_t *node, - arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); -void extent_node_dirty_remove(extent_node_t *node); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) -JEMALLOC_INLINE arena_t * -extent_node_arena_get(const extent_node_t *node) -{ - - return (node->en_arena); -} - -JEMALLOC_INLINE void * -extent_node_addr_get(const extent_node_t *node) -{ - - return (node->en_addr); -} - -JEMALLOC_INLINE size_t -extent_node_size_get(const extent_node_t *node) -{ - - return (node->en_size); -} - -JEMALLOC_INLINE size_t -extent_node_sn_get(const extent_node_t *node) -{ - - return (node->en_sn); -} - -JEMALLOC_INLINE bool -extent_node_zeroed_get(const extent_node_t *node) -{ - - return (node->en_zeroed); -} - -JEMALLOC_INLINE bool -extent_node_committed_get(const extent_node_t *node) -{ - - assert(!node->en_achunk); - return (node->en_committed); -} - -JEMALLOC_INLINE bool -extent_node_achunk_get(const extent_node_t *node) -{ - - return (node->en_achunk); -} - -JEMALLOC_INLINE prof_tctx_t * -extent_node_prof_tctx_get(const extent_node_t *node) -{ - - return (node->en_prof_tctx); -} - -JEMALLOC_INLINE void -extent_node_arena_set(extent_node_t *node, arena_t *arena) -{ - - node->en_arena = arena; -} - -JEMALLOC_INLINE void -extent_node_addr_set(extent_node_t *node, void *addr) -{ - - node->en_addr = addr; -} - -JEMALLOC_INLINE void -extent_node_size_set(extent_node_t *node, size_t size) -{ - - node->en_size = size; -} - -JEMALLOC_INLINE void -extent_node_sn_set(extent_node_t *node, size_t sn) -{ - - node->en_sn = sn; -} - -JEMALLOC_INLINE void -extent_node_zeroed_set(extent_node_t *node, bool zeroed) -{ - - node->en_zeroed = zeroed; -} - -JEMALLOC_INLINE void -extent_node_committed_set(extent_node_t *node, bool committed) -{ - - node->en_committed = committed; -} - -JEMALLOC_INLINE void -extent_node_achunk_set(extent_node_t *node, bool achunk) -{ - - node->en_achunk = achunk; -} - -JEMALLOC_INLINE void -extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) -{ - - node->en_prof_tctx = tctx; -} - -JEMALLOC_INLINE void -extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, - size_t sn, bool zeroed, bool committed) -{ - - extent_node_arena_set(node, arena); - extent_node_addr_set(node, addr); - extent_node_size_set(node, size); - extent_node_sn_set(node, sn); - extent_node_zeroed_set(node, zeroed); - extent_node_committed_set(node, committed); - extent_node_achunk_set(node, false); - if (config_prof) - extent_node_prof_tctx_set(node, NULL); -} - -JEMALLOC_INLINE void -extent_node_dirty_linkage_init(extent_node_t *node) -{ - - qr_new(&node->rd, rd_link); - qr_new(node, cc_link); -} - -JEMALLOC_INLINE void -extent_node_dirty_insert(extent_node_t *node, - arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) -{ - - qr_meld(runs_dirty, &node->rd, rd_link); - qr_meld(chunks_dirty, node, cc_link); -} - -JEMALLOC_INLINE void -extent_node_dirty_remove(extent_node_t *node) -{ - - qr_remove(&node->rd, rd_link); - qr_remove(node, cc_link); -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h new file mode 100644 index 0000000..e8f02ce --- /dev/null +++ b/include/jemalloc/internal/extent_dss.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H +#define JEMALLOC_INTERNAL_EXTENT_DSS_H + +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + +extern const char *dss_prec_names[]; + +extern const char *opt_dss; + +dss_prec_t extent_dss_prec_get(void); +bool extent_dss_prec_set(dss_prec_t dss_prec); +void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit); +bool extent_in_dss(void *addr); +bool extent_dss_mergeable(void *addr_a, void *addr_b); +void extent_dss_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */ diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h new file mode 100644 index 0000000..489a813 --- /dev/null +++ b/include/jemalloc/internal/extent_externs.h @@ -0,0 +1,72 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H + +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_pool.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/rtree.h" + +extern rtree_t extents_rtree; +extern const extent_hooks_t extent_hooks_default; +extern mutex_pool_t extent_mutex_pool; + +extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); +void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); + +extent_hooks_t *extent_hooks_get(arena_t *arena); +extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena, + extent_hooks_t *extent_hooks); + +#ifdef JEMALLOC_JET +size_t extent_size_quantize_floor(size_t size); +size_t extent_size_quantize_ceil(size_t size); +#endif + +rb_proto(, extent_avail_, extent_tree_t, extent_t) +ph_proto(, extent_heap_, extent_heap_t, extent_t) + +bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state, + bool delay_coalesce); +extent_state_t extents_state_get(const extents_t *extents); +size_t extents_npages_get(extents_t *extents); +extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, + size_t size, size_t pad, size_t alignment, bool slab, szind_t szind, + bool *zero, bool *commit); +void extents_dalloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent); +extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min); +void extents_prefork(tsdn_t *tsdn, extents_t *extents); +void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents); +void extents_postfork_child(tsdn_t *tsdn, extents_t *extents); +extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad, + size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit); +void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent); +void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, + szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b); +bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b); + +bool extent_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */ diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h new file mode 100644 index 0000000..bb2bd69 --- /dev/null +++ b/include/jemalloc/internal/extent_inlines.h @@ -0,0 +1,407 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H +#define JEMALLOC_INTERNAL_EXTENT_INLINES_H + +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_pool.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sz.h" + +static inline void +extent_lock(tsdn_t *tsdn, extent_t *extent) { + assert(extent != NULL); + mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent); +} + +static inline void +extent_unlock(tsdn_t *tsdn, extent_t *extent) { + assert(extent != NULL); + mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent); +} + +static inline void +extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) { + assert(extent1 != NULL && extent2 != NULL); + mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1, + (uintptr_t)extent2); +} + +static inline void +extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) { + assert(extent1 != NULL && extent2 != NULL); + mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1, + (uintptr_t)extent2); +} + +static inline arena_t * +extent_arena_get(const extent_t *extent) { + unsigned arena_ind = (unsigned)((extent->e_bits & + EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT); + /* + * The following check is omitted because we should never actually read + * a NULL arena pointer. + */ + if (false && arena_ind >= MALLOCX_ARENA_LIMIT) { + return NULL; + } + assert(arena_ind < MALLOCX_ARENA_LIMIT); + return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE); +} + +static inline szind_t +extent_szind_get_maybe_invalid(const extent_t *extent) { + szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >> + EXTENT_BITS_SZIND_SHIFT); + assert(szind <= NSIZES); + return szind; +} + +static inline szind_t +extent_szind_get(const extent_t *extent) { + szind_t szind = extent_szind_get_maybe_invalid(extent); + assert(szind < NSIZES); /* Never call when "invalid". */ + return szind; +} + +static inline size_t +extent_usize_get(const extent_t *extent) { + return sz_index2size(extent_szind_get(extent)); +} + +static inline size_t +extent_sn_get(const extent_t *extent) { + return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >> + EXTENT_BITS_SN_SHIFT); +} + +static inline extent_state_t +extent_state_get(const extent_t *extent) { + return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >> + EXTENT_BITS_STATE_SHIFT); +} + +static inline bool +extent_zeroed_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >> + EXTENT_BITS_ZEROED_SHIFT); +} + +static inline bool +extent_committed_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >> + EXTENT_BITS_COMMITTED_SHIFT); +} + +static inline bool +extent_slab_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >> + EXTENT_BITS_SLAB_SHIFT); +} + +static inline unsigned +extent_nfree_get(const extent_t *extent) { + assert(extent_slab_get(extent)); + return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >> + EXTENT_BITS_NFREE_SHIFT); +} + +static inline void * +extent_base_get(const extent_t *extent) { + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent_slab_get(extent)); + return PAGE_ADDR2BASE(extent->e_addr); +} + +static inline void * +extent_addr_get(const extent_t *extent) { + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent_slab_get(extent)); + return extent->e_addr; +} + +static inline size_t +extent_size_get(const extent_t *extent) { + return (extent->e_size_esn & EXTENT_SIZE_MASK); +} + +static inline size_t +extent_esn_get(const extent_t *extent) { + return (extent->e_size_esn & EXTENT_ESN_MASK); +} + +static inline size_t +extent_bsize_get(const extent_t *extent) { + return extent->e_bsize; +} + +static inline void * +extent_before_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) - PAGE); +} + +static inline void * +extent_last_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent) - PAGE); +} + +static inline void * +extent_past_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent)); +} + +static inline arena_slab_data_t * +extent_slab_data_get(extent_t *extent) { + assert(extent_slab_get(extent)); + return &extent->e_slab_data; +} + +static inline const arena_slab_data_t * +extent_slab_data_get_const(const extent_t *extent) { + assert(extent_slab_get(extent)); + return &extent->e_slab_data; +} + +static inline prof_tctx_t * +extent_prof_tctx_get(const extent_t *extent) { + return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx, + ATOMIC_ACQUIRE); +} + +static inline void +extent_arena_set(extent_t *extent, arena_t *arena) { + unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U << + MALLOCX_ARENA_BITS) - 1); + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) | + ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT); +} + +static inline void +extent_addr_set(extent_t *extent, void *addr) { + extent->e_addr = addr; +} + +static inline void +extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) { + assert(extent_base_get(extent) == extent_addr_get(extent)); + + if (alignment < PAGE) { + unsigned lg_range = LG_PAGE - + lg_floor(CACHELINE_CEILING(alignment)); + size_t r = + prng_lg_range_zu(&extent_arena_get(extent)->offset_state, + lg_range, true); + uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - + lg_range); + extent->e_addr = (void *)((uintptr_t)extent->e_addr + + random_offset); + assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) == + extent->e_addr); + } +} + +static inline void +extent_size_set(extent_t *extent, size_t size) { + assert((size & ~EXTENT_SIZE_MASK) == 0); + extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK); +} + +static inline void +extent_esn_set(extent_t *extent, size_t esn) { + extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn & + EXTENT_ESN_MASK); +} + +static inline void +extent_bsize_set(extent_t *extent, size_t bsize) { + extent->e_bsize = bsize; +} + +static inline void +extent_szind_set(extent_t *extent, szind_t szind) { + assert(szind <= NSIZES); /* NSIZES means "invalid". */ + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) | + ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT); +} + +static inline void +extent_nfree_set(extent_t *extent, unsigned nfree) { + assert(extent_slab_get(extent)); + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) | + ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_nfree_inc(extent_t *extent) { + assert(extent_slab_get(extent)); + extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_nfree_dec(extent_t *extent) { + assert(extent_slab_get(extent)); + extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_sn_set(extent_t *extent, size_t sn) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) | + ((uint64_t)sn << EXTENT_BITS_SN_SHIFT); +} + +static inline void +extent_state_set(extent_t *extent, extent_state_t state) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) | + ((uint64_t)state << EXTENT_BITS_STATE_SHIFT); +} + +static inline void +extent_zeroed_set(extent_t *extent, bool zeroed) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) | + ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT); +} + +static inline void +extent_committed_set(extent_t *extent, bool committed) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) | + ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT); +} + +static inline void +extent_slab_set(extent_t *extent, bool slab) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) | + ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT); +} + +static inline void +extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) { + atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE); +} + +static inline void +extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, + bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed, + bool committed) { + assert(addr == PAGE_ADDR2BASE(addr) || !slab); + + extent_arena_set(extent, arena); + extent_addr_set(extent, addr); + extent_size_set(extent, size); + extent_slab_set(extent, slab); + extent_szind_set(extent, szind); + extent_sn_set(extent, sn); + extent_state_set(extent, state); + extent_zeroed_set(extent, zeroed); + extent_committed_set(extent, committed); + ql_elm_new(extent, ql_link); + if (config_prof) { + extent_prof_tctx_set(extent, NULL); + } +} + +static inline void +extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) { + extent_arena_set(extent, NULL); + extent_addr_set(extent, addr); + extent_bsize_set(extent, bsize); + extent_slab_set(extent, false); + extent_szind_set(extent, NSIZES); + extent_sn_set(extent, sn); + extent_state_set(extent, extent_state_active); + extent_zeroed_set(extent, true); + extent_committed_set(extent, true); +} + +static inline void +extent_list_init(extent_list_t *list) { + ql_new(list); +} + +static inline extent_t * +extent_list_first(const extent_list_t *list) { + return ql_first(list); +} + +static inline extent_t * +extent_list_last(const extent_list_t *list) { + return ql_last(list, ql_link); +} + +static inline void +extent_list_append(extent_list_t *list, extent_t *extent) { + ql_tail_insert(list, extent, ql_link); +} + +static inline void +extent_list_replace(extent_list_t *list, extent_t *to_remove, + extent_t *to_insert) { + ql_after_insert(to_remove, to_insert, ql_link); + ql_remove(list, to_remove, ql_link); +} + +static inline void +extent_list_remove(extent_list_t *list, extent_t *extent) { + ql_remove(list, extent, ql_link); +} + +static inline int +extent_sn_comp(const extent_t *a, const extent_t *b) { + size_t a_sn = extent_sn_get(a); + size_t b_sn = extent_sn_get(b); + + return (a_sn > b_sn) - (a_sn < b_sn); +} + +static inline int +extent_esn_comp(const extent_t *a, const extent_t *b) { + size_t a_esn = extent_esn_get(a); + size_t b_esn = extent_esn_get(b); + + return (a_esn > b_esn) - (a_esn < b_esn); +} + +static inline int +extent_ad_comp(const extent_t *a, const extent_t *b) { + uintptr_t a_addr = (uintptr_t)extent_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_addr_get(b); + + return (a_addr > b_addr) - (a_addr < b_addr); +} + +static inline int +extent_ead_comp(const extent_t *a, const extent_t *b) { + uintptr_t a_eaddr = (uintptr_t)a; + uintptr_t b_eaddr = (uintptr_t)b; + + return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr); +} + +static inline int +extent_snad_comp(const extent_t *a, const extent_t *b) { + int ret; + + ret = extent_sn_comp(a, b); + if (ret != 0) { + return ret; + } + + ret = extent_ad_comp(a, b); + return ret; +} + +static inline int +extent_esnead_comp(const extent_t *a, const extent_t *b) { + int ret; + + ret = extent_esn_comp(a, b); + if (ret != 0) { + return ret; + } + + ret = extent_ead_comp(a, b); + return ret; +} + +#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */ diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h new file mode 100644 index 0000000..55f17ee --- /dev/null +++ b/include/jemalloc/internal/extent_mmap.h @@ -0,0 +1,10 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H + +extern bool opt_retain; + +void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); +bool extent_dalloc_mmap(void *addr, size_t size); + +#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */ diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h new file mode 100644 index 0000000..d297950 --- /dev/null +++ b/include/jemalloc/internal/extent_structs.h @@ -0,0 +1,199 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H +#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/size_classes.h" + +typedef enum { + extent_state_active = 0, + extent_state_dirty = 1, + extent_state_muzzy = 2, + extent_state_retained = 3 +} extent_state_t; + +/* Extent (span of pages). Use accessor functions for e_* fields. */ +struct extent_s { + /* + * Bitfield containing several fields: + * + * a: arena_ind + * b: slab + * c: committed + * z: zeroed + * t: state + * i: szind + * f: nfree + * n: sn + * + * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa + * + * arena_ind: Arena from which this extent came, or all 1 bits if + * unassociated. + * + * slab: The slab flag indicates whether the extent is used for a slab + * of small regions. This helps differentiate small size classes, + * and it indicates whether interior pointers can be looked up via + * iealloc(). + * + * committed: The committed flag indicates whether physical memory is + * committed to the extent, whether explicitly or implicitly + * as on a system that overcommits and satisfies physical + * memory needs on demand via soft page faults. + * + * zeroed: The zeroed flag is used by extent recycling code to track + * whether memory is zero-filled. + * + * state: The state flag is an extent_state_t. + * + * szind: The szind flag indicates usable size class index for + * allocations residing in this extent, regardless of whether the + * extent is a slab. Extent size and usable size often differ + * even for non-slabs, either due to sz_large_pad or promotion of + * sampled small regions. + * + * nfree: Number of free regions in slab. + * + * sn: Serial number (potentially non-unique). + * + * Serial numbers may wrap around if !opt_retain, but as long as + * comparison functions fall back on address comparison for equal + * serial numbers, stable (if imperfect) ordering is maintained. + * + * Serial numbers may not be unique even in the absence of + * wrap-around, e.g. when splitting an extent and assigning the same + * serial number to both resulting adjacent extents. + */ + uint64_t e_bits; +#define EXTENT_BITS_ARENA_SHIFT 0 +#define EXTENT_BITS_ARENA_MASK \ + (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT) + +#define EXTENT_BITS_SLAB_SHIFT MALLOCX_ARENA_BITS +#define EXTENT_BITS_SLAB_MASK \ + ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT) + +#define EXTENT_BITS_COMMITTED_SHIFT (MALLOCX_ARENA_BITS + 1) +#define EXTENT_BITS_COMMITTED_MASK \ + ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT) + +#define EXTENT_BITS_ZEROED_SHIFT (MALLOCX_ARENA_BITS + 2) +#define EXTENT_BITS_ZEROED_MASK \ + ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT) + +#define EXTENT_BITS_STATE_SHIFT (MALLOCX_ARENA_BITS + 3) +#define EXTENT_BITS_STATE_MASK \ + ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT) + +#define EXTENT_BITS_SZIND_SHIFT (MALLOCX_ARENA_BITS + 5) +#define EXTENT_BITS_SZIND_MASK \ + (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT) + +#define EXTENT_BITS_NFREE_SHIFT \ + (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES) +#define EXTENT_BITS_NFREE_MASK \ + ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT) + +#define EXTENT_BITS_SN_SHIFT \ + (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1)) +#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) + + /* Pointer to the extent that this structure is responsible for. */ + void *e_addr; + + union { + /* + * Extent size and serial number associated with the extent + * structure (different than the serial number for the extent at + * e_addr). + * + * ssssssss [...] ssssssss ssssnnnn nnnnnnnn + */ + size_t e_size_esn; + #define EXTENT_SIZE_MASK ((size_t)~(PAGE-1)) + #define EXTENT_ESN_MASK ((size_t)PAGE-1) + /* Base extent size, which may not be a multiple of PAGE. */ + size_t e_bsize; + }; + + union { + /* + * List linkage, used by a variety of lists: + * - arena_bin_t's slabs_full + * - extents_t's LRU + * - stashed dirty extents + * - arena's large allocations + */ + ql_elm(extent_t) ql_link; + /* Red-black tree linkage, used by arena's extent_avail. */ + rb_node(extent_t) rb_link; + }; + + /* Linkage for per size class sn/address-ordered heaps. */ + phn(extent_t) ph_link; + + union { + /* Small region slab metadata. */ + arena_slab_data_t e_slab_data; + + /* + * Profile counters, used for large objects. Points to a + * prof_tctx_t. + */ + atomic_p_t e_prof_tctx; + }; +}; +typedef ql_head(extent_t) extent_list_t; +typedef rb_tree(extent_t) extent_tree_t; +typedef ph(extent_t) extent_heap_t; + +/* Quantized collection of extents, with built-in LRU queue. */ +struct extents_s { + malloc_mutex_t mtx; + + /* + * Quantized per size class heaps of extents. + * + * Synchronization: mtx. + */ + extent_heap_t heaps[NPSIZES+1]; + + /* + * Bitmap for which set bits correspond to non-empty heaps. + * + * Synchronization: mtx. + */ + bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)]; + + /* + * LRU of all extents in heaps. + * + * Synchronization: mtx. + */ + extent_list_t lru; + + /* + * Page sum for all extents in heaps. + * + * The synchronization here is a little tricky. Modifications to npages + * must hold mtx, but reads need not (though, a reader who sees npages + * without holding the mutex can't assume anything about the rest of the + * state of the extents_t). + */ + atomic_zu_t npages; + + /* All stored extents must be in the same state. */ + extent_state_t state; + + /* + * If true, delay coalescing until eviction; otherwise coalesce during + * deallocation. + */ + bool delay_coalesce; +}; + +#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */ diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h new file mode 100644 index 0000000..b6905ce --- /dev/null +++ b/include/jemalloc/internal/extent_types.h @@ -0,0 +1,9 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H +#define JEMALLOC_INTERNAL_EXTENT_TYPES_H + +typedef struct extent_s extent_t; +typedef struct extents_s extents_t; + +#define EXTENT_HOOKS_INITIALIZER NULL + +#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 1ff2d9a..188296c 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,109 +1,76 @@ +#ifndef JEMALLOC_INTERNAL_HASH_H +#define JEMALLOC_INTERNAL_HASH_H + +#include "jemalloc/internal/assert.h" + /* * The following hash function is based on MurmurHash3, placed into the public * domain by Austin Appleby. See https://github.com/aappleby/smhasher for * details. */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t hash_x86_32(const void *key, int len, uint32_t seed); -void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]); -void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]); -void hash(const void *key, size_t len, const uint32_t seed, - size_t r_hash[2]); -#endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) /******************************************************************************/ /* Internal implementation. */ -JEMALLOC_INLINE uint32_t -hash_rotl_32(uint32_t x, int8_t r) -{ - +static inline uint32_t +hash_rotl_32(uint32_t x, int8_t r) { return ((x << r) | (x >> (32 - r))); } -JEMALLOC_INLINE uint64_t -hash_rotl_64(uint64_t x, int8_t r) -{ - +static inline uint64_t +hash_rotl_64(uint64_t x, int8_t r) { return ((x << r) | (x >> (64 - r))); } -JEMALLOC_INLINE uint32_t -hash_get_block_32(const uint32_t *p, int i) -{ - +static inline uint32_t +hash_get_block_32(const uint32_t *p, int i) { /* Handle unaligned read. */ if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { uint32_t ret; memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); - return (ret); + return ret; } - return (p[i]); + return p[i]; } -JEMALLOC_INLINE uint64_t -hash_get_block_64(const uint64_t *p, int i) -{ - +static inline uint64_t +hash_get_block_64(const uint64_t *p, int i) { /* Handle unaligned read. */ if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { uint64_t ret; memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); - return (ret); + return ret; } - return (p[i]); + return p[i]; } -JEMALLOC_INLINE uint32_t -hash_fmix_32(uint32_t h) -{ - +static inline uint32_t +hash_fmix_32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; - return (h); + return h; } -JEMALLOC_INLINE uint64_t -hash_fmix_64(uint64_t k) -{ - +static inline uint64_t +hash_fmix_64(uint64_t k) { k ^= k >> 33; k *= KQU(0xff51afd7ed558ccd); k ^= k >> 33; k *= KQU(0xc4ceb9fe1a85ec53); k ^= k >> 33; - return (k); + return k; } -JEMALLOC_INLINE uint32_t -hash_x86_32(const void *key, int len, uint32_t seed) -{ +static inline uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 4; @@ -149,13 +116,12 @@ hash_x86_32(const void *key, int len, uint32_t seed) h1 = hash_fmix_32(h1); - return (h1); + return h1; } -UNUSED JEMALLOC_INLINE void +UNUSED static inline void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) -{ + uint64_t r_out[2]) { const uint8_t * data = (const uint8_t *) key; const int nblocks = len / 16; @@ -254,10 +220,9 @@ hash_x86_128(const void *key, const int len, uint32_t seed, r_out[1] = (((uint64_t) h4) << 32) | h3; } -UNUSED JEMALLOC_INLINE void +UNUSED static inline void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) -{ + uint64_t r_out[2]) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 16; @@ -334,10 +299,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, /******************************************************************************/ /* API. */ -JEMALLOC_INLINE void -hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) -{ - +static inline void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) @@ -351,7 +314,5 @@ hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) } #endif } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_HASH_H */ diff --git a/include/jemalloc/internal/hooks.h b/include/jemalloc/internal/hooks.h new file mode 100644 index 0000000..cd49afc --- /dev/null +++ b/include/jemalloc/internal/hooks.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_HOOKS_H +#define JEMALLOC_INTERNAL_HOOKS_H + +extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)(); +extern JEMALLOC_EXPORT void (*hooks_libc_hook)(); + +#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn) + +#define open JEMALLOC_HOOK(open, hooks_libc_hook) +#define read JEMALLOC_HOOK(read, hooks_libc_hook) +#define write JEMALLOC_HOOK(write, hooks_libc_hook) +#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook) +#define close JEMALLOC_HOOK(close, hooks_libc_hook) +#define creat JEMALLOC_HOOK(creat, hooks_libc_hook) +#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook) +/* Note that this is undef'd and re-define'd in src/prof.c. */ +#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) + +#endif /* JEMALLOC_INTERNAL_HOOKS_H */ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h deleted file mode 100644 index 22184d9..0000000 --- a/include/jemalloc/internal/huge.h +++ /dev/null @@ -1,35 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); -void *huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero); -bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, - size_t usize_min, size_t usize_max, bool zero); -void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t usize, size_t alignment, bool zero, tcache_t *tcache); -#ifdef JEMALLOC_JET -typedef void (huge_dalloc_junk_t)(void *, size_t); -extern huge_dalloc_junk_t *huge_dalloc_junk; -#endif -void huge_dalloc(tsdn_t *tsdn, void *ptr); -arena_t *huge_aalloc(const void *ptr); -size_t huge_salloc(tsdn_t *tsdn, const void *ptr); -prof_tctx_t *huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx); -void huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in deleted file mode 100644 index e3b499a..0000000 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ /dev/null @@ -1,1294 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_H -#define JEMALLOC_INTERNAL_H - -#include "jemalloc_internal_defs.h" -#include "jemalloc/internal/jemalloc_internal_decls.h" - -#ifdef JEMALLOC_UTRACE -#include <sys/ktrace.h> -#endif - -#define JEMALLOC_NO_DEMANGLE -#ifdef JEMALLOC_JET -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "../jemalloc@install_suffix@.h" -# undef JEMALLOC_NO_RENAME -#else -# define JEMALLOC_N(n) @private_namespace@##n -# include "../jemalloc@install_suffix@.h" -#endif -#include "jemalloc/internal/private_namespace.h" - -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -static const bool have_dss = -#ifdef JEMALLOC_DSS - true -#else - false -#endif - ; -static const bool config_fill = -#ifdef JEMALLOC_FILL - true -#else - false -#endif - ; -static const bool config_lazy_lock = -#ifdef JEMALLOC_LAZY_LOCK - true -#else - false -#endif - ; -static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; -static const bool config_prof = -#ifdef JEMALLOC_PROF - true -#else - false -#endif - ; -static const bool config_prof_libgcc = -#ifdef JEMALLOC_PROF_LIBGCC - true -#else - false -#endif - ; -static const bool config_prof_libunwind = -#ifdef JEMALLOC_PROF_LIBUNWIND - true -#else - false -#endif - ; -static const bool maps_coalesce = -#ifdef JEMALLOC_MAPS_COALESCE - true -#else - false -#endif - ; -static const bool config_munmap = -#ifdef JEMALLOC_MUNMAP - true -#else - false -#endif - ; -static const bool config_stats = -#ifdef JEMALLOC_STATS - true -#else - false -#endif - ; -static const bool config_tcache = -#ifdef JEMALLOC_TCACHE - true -#else - false -#endif - ; -static const bool config_thp = -#ifdef JEMALLOC_THP - true -#else - false -#endif - ; -static const bool config_tls = -#ifdef JEMALLOC_TLS - true -#else - false -#endif - ; -static const bool config_utrace = -#ifdef JEMALLOC_UTRACE - true -#else - false -#endif - ; -static const bool config_valgrind = -#ifdef JEMALLOC_VALGRIND - true -#else - false -#endif - ; -static const bool config_xmalloc = -#ifdef JEMALLOC_XMALLOC - true -#else - false -#endif - ; -static const bool config_ivsalloc = -#ifdef JEMALLOC_IVSALLOC - true -#else - false -#endif - ; -static const bool config_cache_oblivious = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - true -#else - false -#endif - ; - -#ifdef JEMALLOC_C11ATOMICS -#include <stdatomic.h> -#endif - -#ifdef JEMALLOC_ATOMIC9 -#include <machine/atomic.h> -#endif - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include <libkern/OSAtomic.h> -#endif - -#ifdef JEMALLOC_ZONE -#include <mach/mach_error.h> -#include <mach/mach_init.h> -#include <mach/vm_map.h> -#endif - -#include "jemalloc/internal/ph.h" -#ifndef __PGI -#define RB_COMPACT -#endif -#include "jemalloc/internal/rb.h" -#include "jemalloc/internal/qr.h" -#include "jemalloc/internal/ql.h" - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: - * - * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES - -#include "jemalloc/internal/jemalloc_internal_macros.h" - -/* Page size index type. */ -typedef unsigned pszind_t; - -/* Size class index type. */ -typedef unsigned szind_t; - -/* - * Flags bits: - * - * a: arena - * t: tcache - * 0: unused - * z: zero - * n: alignment - * - * aaaaaaaa aaaatttt tttttttt 0znnnnnn - */ -#define MALLOCX_ARENA_MASK ((int)~0xfffff) -#define MALLOCX_ARENA_MAX 0xffe -#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) -#define MALLOCX_TCACHE_MAX 0xffd -#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) -/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ -#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ - (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) -#define MALLOCX_ALIGN_GET(flags) \ - (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) -#define MALLOCX_ZERO_GET(flags) \ - ((bool)(flags & MALLOCX_ZERO)) - -#define MALLOCX_TCACHE_GET(flags) \ - (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) -#define MALLOCX_ARENA_GET(flags) \ - (((unsigned)(((unsigned)flags) >> 20)) - 1) - -/* Smallest size class to support. */ -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size - * classes). - */ -#ifndef LG_QUANTUM -# if (defined(__i386__) || defined(_M_IX86)) -# define LG_QUANTUM 4 -# endif -# ifdef __ia64__ -# define LG_QUANTUM 4 -# endif -# ifdef __alpha__ -# define LG_QUANTUM 4 -# endif -# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) -# define LG_QUANTUM 4 -# endif -# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) -# define LG_QUANTUM 4 -# endif -# ifdef __arm__ -# define LG_QUANTUM 3 -# endif -# ifdef __aarch64__ -# define LG_QUANTUM 4 -# endif -# ifdef __hppa__ -# define LG_QUANTUM 4 -# endif -# ifdef __mips__ -# define LG_QUANTUM 3 -# endif -# ifdef __or1k__ -# define LG_QUANTUM 3 -# endif -# ifdef __powerpc__ -# define LG_QUANTUM 4 -# endif -# ifdef __riscv__ -# define LG_QUANTUM 4 -# endif -# ifdef __s390__ -# define LG_QUANTUM 4 -# endif -# ifdef __SH4__ -# define LG_QUANTUM 4 -# endif -# ifdef __tile__ -# define LG_QUANTUM 4 -# endif -# ifdef __le32__ -# define LG_QUANTUM 4 -# endif -# ifndef LG_QUANTUM -# error "Unknown minimum alignment for architecture; specify via " - "--with-lg-quantum" -# endif -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - -#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) -#define LONG_MASK (LONG - 1) - -/* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ - (((a) + LONG_MASK) & ~LONG_MASK) - -#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) -#define PTR_MASK (SIZEOF_PTR - 1) - -/* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ - (((a) + PTR_MASK) & ~PTR_MASK) - -/* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing. - * In addition, this controls the spacing of cacheline-spaced size classes. - * - * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can - * only handle raw constants. - */ -#define LG_CACHELINE 6 -#define CACHELINE 64 -#define CACHELINE_MASK (CACHELINE - 1) - -/* Return the smallest cacheline multiple that is >= s. */ -#define CACHELINE_CEILING(s) \ - (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) - -/* Page size. LG_PAGE is determined by the configure script. */ -#ifdef PAGE_MASK -# undef PAGE_MASK -#endif -#define PAGE ((size_t)(1U << LG_PAGE)) -#define PAGE_MASK ((size_t)(PAGE - 1)) - -/* Return the page base address for the page containing address a. */ -#define PAGE_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~PAGE_MASK)) - -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -/* Return the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2BASE(a, alignment) \ - ((void *)((uintptr_t)(a) & ((~(alignment)) + 1))) - -/* Return the offset between a and the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ - ((size_t)((uintptr_t)(a) & (alignment - 1))) - -/* Return the smallest alignment multiple that is >= s. */ -#define ALIGNMENT_CEILING(s, alignment) \ - (((s) + (alignment - 1)) & ((~(alignment)) + 1)) - -/* Declare a variable-length array. */ -#if __STDC_VERSION__ < 199901L -# ifdef _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# ifdef JEMALLOC_HAS_ALLOCA_H -# include <alloca.h> -# else -# include <stdlib.h> -# endif -# endif -# define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * (count)) -#else -# define VARIABLE_ARRAY(type, name, count) type name[(count)] -#endif - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES -/******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#define JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/extent.h" -#define JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_STRUCTS -/******************************************************************************/ -#define JEMALLOC_H_EXTERNS - -extern bool opt_abort; -extern const char *opt_junk; -extern bool opt_junk_alloc; -extern bool opt_junk_free; -extern size_t opt_quarantine; -extern bool opt_redzone; -extern bool opt_utrace; -extern bool opt_xmalloc; -extern bool opt_zero; -extern unsigned opt_narenas; - -extern bool in_valgrind; - -/* Number of CPUs. */ -extern unsigned ncpus; - -/* Number of arenas used for automatic multiplexing of threads and arenas. */ -extern unsigned narenas_auto; - -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - */ -extern arena_t **arenas; - -/* - * pind2sz_tab encodes the same information as could be computed by - * pind2sz_compute(). - */ -extern size_t const pind2sz_tab[NPSIZES]; -/* - * index2size_tab encodes the same information as could be computed (at - * unacceptable cost in some code paths) by index2size_compute(). - */ -extern size_t const index2size_tab[NSIZES]; -/* - * size2index_tab is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via size2index(). - */ -extern uint8_t const size2index_tab[]; - -arena_t *a0get(void); -void *a0malloc(size_t size); -void a0dalloc(void *ptr); -void *bootstrap_malloc(size_t size); -void *bootstrap_calloc(size_t num, size_t size); -void bootstrap_free(void *ptr); -unsigned narenas_total_get(void); -arena_t *arena_init(tsdn_t *tsdn, unsigned ind); -arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); -arena_t *arena_choose_hard(tsd_t *tsd, bool internal); -void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); -void thread_allocated_cleanup(tsd_t *tsd); -void thread_deallocated_cleanup(tsd_t *tsd); -void iarena_cleanup(tsd_t *tsd); -void arena_cleanup(tsd_t *tsd); -void arenas_tdata_cleanup(tsd_t *tsd); -void narenas_tdata_cleanup(tsd_t *tsd); -void arenas_tdata_bypass_cleanup(tsd_t *tsd); -void jemalloc_prefork(void); -void jemalloc_postfork_parent(void); -void jemalloc_postfork_child(void); - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_EXTERNS -/******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/spin.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ticker.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/witness.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" - -#ifndef JEMALLOC_ENABLE_INLINE -pszind_t psz2ind(size_t psz); -size_t pind2sz_compute(pszind_t pind); -size_t pind2sz_lookup(pszind_t pind); -size_t pind2sz(pszind_t pind); -size_t psz2u(size_t psz); -szind_t size2index_compute(size_t size); -szind_t size2index_lookup(size_t size); -szind_t size2index(size_t size); -size_t index2size_compute(szind_t index); -size_t index2size_lookup(szind_t index); -size_t index2size(szind_t index); -size_t s2u_compute(size_t size); -size_t s2u_lookup(size_t size); -size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment); -arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); -arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); -arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, - bool refresh_if_missing); -arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); -ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE pszind_t -psz2ind(size_t psz) -{ - - if (unlikely(psz > HUGE_MAXCLASS)) - return (NPSIZES); - { - pszind_t x = lg_floor((psz<<1)-1); - pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - - (LG_SIZE_CLASS_GROUP + LG_PAGE); - pszind_t grp = shift << LG_SIZE_CLASS_GROUP; - - pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - pszind_t ind = grp + mod; - return (ind); - } -} - -JEMALLOC_INLINE size_t -pind2sz_compute(pszind_t pind) -{ - - { - size_t grp = pind >> LG_SIZE_CLASS_GROUP; - size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_PAGE + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_PAGE-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t sz = grp_size + mod_size; - return (sz); - } -} - -JEMALLOC_INLINE size_t -pind2sz_lookup(pszind_t pind) -{ - size_t ret = (size_t)pind2sz_tab[pind]; - assert(ret == pind2sz_compute(pind)); - return (ret); -} - -JEMALLOC_INLINE size_t -pind2sz(pszind_t pind) -{ - - assert(pind < NPSIZES); - return (pind2sz_lookup(pind)); -} - -JEMALLOC_INLINE size_t -psz2u(size_t psz) -{ - - if (unlikely(psz > HUGE_MAXCLASS)) - return (0); - { - size_t x = lg_floor((psz<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (psz + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_INLINE szind_t -size2index_compute(size_t size) -{ - - if (unlikely(size > HUGE_MAXCLASS)) - return (NSIZES); -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); - return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); - } -#endif - { - szind_t x = lg_floor((size<<1)-1); - szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - szind_t grp = shift << LG_SIZE_CLASS_GROUP; - - szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - szind_t index = NTBINS + grp + mod; - return (index); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -size2index_lookup(size_t size) -{ - - assert(size <= LOOKUP_MAXCLASS); - { - szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); - assert(ret == size2index_compute(size)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -size2index(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (size2index_lookup(size)); - return (size2index_compute(size)); -} - -JEMALLOC_INLINE size_t -index2size_compute(szind_t index) -{ - -#if (NTBINS > 0) - if (index < NTBINS) - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); -#endif - { - size_t reduced_index = index - NTBINS; - size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_QUANTUM-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t usize = grp_size + mod_size; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -index2size_lookup(szind_t index) -{ - size_t ret = (size_t)index2size_tab[index]; - assert(ret == index2size_compute(index)); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -index2size(szind_t index) -{ - - assert(index < NSIZES); - return (index2size_lookup(index)); -} - -JEMALLOC_ALWAYS_INLINE size_t -s2u_compute(size_t size) -{ - - if (unlikely(size > HUGE_MAXCLASS)) - return (0); -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); - return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : - (ZU(1) << lg_ceil)); - } -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (size + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -s2u_lookup(size_t size) -{ - size_t ret = index2size_lookup(size2index_lookup(size)); - - assert(ret == s2u_compute(size)); - return (ret); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size. - */ -JEMALLOC_ALWAYS_INLINE size_t -s2u(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (s2u_lookup(size)); - return (s2u_compute(size)); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size and alignment. - */ -JEMALLOC_ALWAYS_INLINE size_t -sa2u(size_t size, size_t alignment) -{ - size_t usize; - - assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - - /* Try for a small size class. */ - if (size <= SMALL_MAXCLASS && alignment < PAGE) { - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each - * small size class, every object is aligned at the smallest - * power of two that is non-zero in the base two representation - * of the size. For example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = s2u(ALIGNMENT_CEILING(size, alignment)); - if (usize < LARGE_MINCLASS) - return (usize); - } - - /* Try for a large size class. */ - if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { - /* - * We can't achieve subpage alignment, so round up alignment - * to the minimum that can actually be supported. - */ - alignment = PAGE_CEILING(alignment); - - /* Make sure result is a large size class. */ - usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - */ - if (usize + large_pad + alignment - PAGE <= arena_maxrun) - return (usize); - } - - /* Huge size class. Beware of overflow. */ - - if (unlikely(alignment > HUGE_MAXCLASS)) - return (0); - - /* - * We can't achieve subchunk alignment, so round up alignment to the - * minimum that can actually be supported. - */ - alignment = CHUNK_CEILING(alignment); - - /* Make sure result is a huge size class. */ - if (size <= chunksize) - usize = chunksize; - else { - usize = s2u(size); - if (usize < size) { - /* size_t overflow. */ - return (0); - } - } - - /* - * Calculate the multi-chunk mapping that huge_palloc() would need in - * order to guarantee the alignment. - */ - if (usize + alignment - PAGE < usize) { - /* size_t overflow. */ - return (0); - } - return (usize); -} - -/* Choose an arena based on a per-thread value. */ -JEMALLOC_INLINE arena_t * -arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) -{ - arena_t *ret; - - if (arena != NULL) - return (arena); - - ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); - if (unlikely(ret == NULL)) - ret = arena_choose_hard(tsd, internal); - - return (ret); -} - -JEMALLOC_INLINE arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) -{ - - return (arena_choose_impl(tsd, arena, false)); -} - -JEMALLOC_INLINE arena_t * -arena_ichoose(tsd_t *tsd, arena_t *arena) -{ - - return (arena_choose_impl(tsd, arena, true)); -} - -JEMALLOC_INLINE arena_tdata_t * -arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) -{ - arena_tdata_t *tdata; - arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); - - if (unlikely(arenas_tdata == NULL)) { - /* arenas_tdata hasn't been initialized yet. */ - return (arena_tdata_get_hard(tsd, ind)); - } - if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { - /* - * ind is invalid, cache is old (too small), or tdata to be - * initialized. - */ - return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : - NULL); - } - - tdata = &arenas_tdata[ind]; - if (likely(tdata != NULL) || !refresh_if_missing) - return (tdata); - return (arena_tdata_get_hard(tsd, ind)); -} - -JEMALLOC_INLINE arena_t * -arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) -{ - arena_t *ret; - - assert(ind <= MALLOCX_ARENA_MAX); - - ret = arenas[ind]; - if (unlikely(ret == NULL)) { - ret = atomic_read_p((void *)&arenas[ind]); - if (init_if_missing && unlikely(ret == NULL)) - ret = arena_init(tsdn, ind); - } - return (ret); -} - -JEMALLOC_INLINE ticker_t * -decay_ticker_get(tsd_t *tsd, unsigned ind) -{ - arena_tdata_t *tdata; - - tdata = arena_tdata_get(tsd, ind, true); - if (unlikely(tdata == NULL)) - return (NULL); - return (&tdata->decay_ticker); -} -#endif - -#include "jemalloc/internal/bitmap.h" -/* - * Include portions of arena.h interleaved with tcache.h in order to resolve - * circular dependencies. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" - -#ifndef JEMALLOC_ENABLE_INLINE -arena_t *iaalloc(const void *ptr); -size_t isalloc(tsdn_t *tsdn, const void *ptr, bool demote); -void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); -void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, - bool slow_path); -void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena); -void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena); -void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); -size_t ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote); -size_t u2rz(size_t usize); -size_t p2rz(tsdn_t *tsdn, const void *ptr); -void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, - bool slow_path); -void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); -void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, - bool slow_path); -void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache, - arena_t *arena); -void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); -void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero); -bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_ALWAYS_INLINE arena_t * -iaalloc(const void *ptr) -{ - - assert(ptr != NULL); - - return (arena_aalloc(ptr)); -} - -/* - * Typical usage: - * tsdn_t *tsdn = [...] - * void *ptr = [...] - * size_t sz = isalloc(tsdn, ptr, config_prof); - */ -JEMALLOC_ALWAYS_INLINE size_t -isalloc(tsdn_t *tsdn, const void *ptr, bool demote) -{ - - assert(ptr != NULL); - /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || !demote); - - return (arena_salloc(tsdn, ptr, demote)); -} - -JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena, bool slow_path) -{ - void *ret; - - assert(size != 0); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - - ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); - if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), - isalloc(tsdn, ret, config_prof)); - } - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) -{ - - return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true), - false, NULL, slow_path)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment)); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); - - ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); - assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); - if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret, - config_prof)); - } - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) -{ - - return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) -{ - - return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, - tcache_get(tsd, true), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE size_t -ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote) -{ - extent_node_t *node; - - /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - node = chunk_lookup(ptr, false); - if (node == NULL) - return (0); - /* Only arena chunks should be looked up via interior pointers. */ - assert(extent_node_addr_get(node) == ptr || - extent_node_achunk_get(node)); - - return (isalloc(tsdn, ptr, demote)); -} - -JEMALLOC_INLINE size_t -u2rz(size_t usize) -{ - size_t ret; - - if (usize <= SMALL_MAXCLASS) { - szind_t binind = size2index(usize); - ret = arena_bin_info[binind].redzone_size; - } else - ret = 0; - - return (ret); -} - -JEMALLOC_INLINE size_t -p2rz(tsdn_t *tsdn, const void *ptr) -{ - size_t usize = isalloc(tsdn, ptr, false); - - return (u2rz(usize)); -} - -JEMALLOC_ALWAYS_INLINE void -idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, - bool slow_path) -{ - - assert(ptr != NULL); - assert(!is_metadata || tcache == NULL); - assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto); - if (config_stats && is_metadata) { - arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr, - config_prof)); - } - - arena_dalloc(tsdn, ptr, tcache, slow_path); -} - -JEMALLOC_ALWAYS_INLINE void -idalloc(tsd_t *tsd, void *ptr) -{ - - idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true); -} - -JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) -{ - - if (slow_path && config_fill && unlikely(opt_quarantine)) - quarantine(tsd, ptr); - else - idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path); -} - -JEMALLOC_ALWAYS_INLINE void -isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - bool slow_path) -{ - - arena_sdalloc(tsdn, ptr, size, tcache, slow_path); -} - -JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) -{ - - if (slow_path && config_fill && unlikely(opt_quarantine)) - quarantine(tsd, ptr); - else - isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) -{ - void *p; - size_t usize, copysize; - - usize = sa2u(size + extra, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) - return (NULL); - p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena); - if (p == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment); - if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) - return (NULL); - p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, - arena); - if (p == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache, true); - return (p); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, tcache_t *tcache, arena_t *arena) -{ - - assert(ptr != NULL); - assert(size != 0); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - /* - * Existing object alignment is inadequate; allocate new space - * and copy. - */ - return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, - zero, tcache, arena)); - } - - return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, - tcache)); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero) -{ - - return (iralloct(tsd, ptr, oldsize, size, alignment, zero, - tcache_get(tsd, true), NULL)); -} - -JEMALLOC_ALWAYS_INLINE bool -ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) -{ - - assert(ptr != NULL); - assert(size != 0); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - /* Existing object alignment is inadequate. */ - return (true); - } - - return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero)); -} -#endif - -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_INLINES -/******************************************************************************/ -#endif /* JEMALLOC_INTERNAL_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index c907d91..1efdb56 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -1,5 +1,5 @@ #ifndef JEMALLOC_INTERNAL_DECLS_H -#define JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H #include <math.h> #ifdef _WIN32 @@ -14,6 +14,11 @@ # if !defined(SYS_write) && defined(__NR_write) # define SYS_write __NR_write # endif +# if defined(SYS_open) && defined(__aarch64__) + /* Android headers may define SYS_open to __NR_open even though + * __NR_open may not exist on AArch64 (superseded by __NR_openat). */ +# undef SYS_open +# endif # include <sys/uio.h> # endif # include <pthread.h> @@ -36,6 +41,9 @@ #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif +#ifndef SSIZE_MAX +# define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1)) +#endif #include <stdarg.h> #include <stdbool.h> #include <stdio.h> @@ -61,9 +69,7 @@ typedef intptr_t ssize_t; # pragma warning(disable: 4996) #if _MSC_VER < 1800 static int -isblank(int c) -{ - +isblank(int c) { return (c == '\t' || c == ' '); } #endif diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 7c88b0d..2bf9dea 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,5 +1,5 @@ #ifndef JEMALLOC_INTERNAL_DEFS_H_ -#define JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -9,6 +9,18 @@ #undef JEMALLOC_CPREFIX /* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#undef JEMALLOC_OVERRIDE___LIBC_CALLOC +#undef JEMALLOC_OVERRIDE___LIBC_FREE +#undef JEMALLOC_OVERRIDE___LIBC_MALLOC +#undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN +#undef JEMALLOC_OVERRIDE___LIBC_REALLOC +#undef JEMALLOC_OVERRIDE___LIBC_VALLOC +#undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. * For shared libraries, symbol visibility mechanisms prevent these symbols * from being exported, but for static libraries, naming collisions are a real @@ -22,17 +34,21 @@ */ #undef CPU_SPINWAIT +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#undef LG_VADDR + /* Defined if C11 atomics are available. */ -#undef JEMALLOC_C11ATOMICS +#undef JEMALLOC_C11_ATOMICS -/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ -#undef JEMALLOC_ATOMIC9 +/* Defined if GCC __atomic atomics are available. */ +#undef JEMALLOC_GCC_ATOMIC_ATOMICS -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -#undef JEMALLOC_OSATOMIC +/* Defined if GCC __sync atomics are available. */ +#undef JEMALLOC_GCC_SYNC_ATOMICS /* * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and @@ -123,12 +139,6 @@ /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL -/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ -#undef JEMALLOC_CC_SILENCE - -/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ -#undef JEMALLOC_CODE_COVERAGE - /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -151,36 +161,23 @@ #undef JEMALLOC_PROF_GCC /* - * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. - * This makes it possible to allocate/deallocate objects without any locking - * when the cache is in the steady state. - */ -#undef JEMALLOC_TCACHE - -/* - * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage * segment (DSS). */ #undef JEMALLOC_DSS -/* Support memory filling (junk/zero/quarantine/redzone). */ +/* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL /* Support utrace(2)-based tracing. */ #undef JEMALLOC_UTRACE -/* Support Valgrind. */ -#undef JEMALLOC_VALGRIND - /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ -#undef LG_TINY_MIN - /* * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). @@ -191,6 +188,13 @@ #undef LG_PAGE /* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#undef LG_HUGEPAGE + +/* * If defined, adjacent virtual memory mappings with identical attributes * automatically coalesce, and they fragment when changes are made to subranges. * This is the normal order of things for mmap()/munmap(), but on Windows @@ -200,11 +204,12 @@ #undef JEMALLOC_MAPS_COALESCE /* - * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is disabled by default on Linux because common sequences - * of mmap()/munmap() calls will cause virtual memory map holes. + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. */ -#undef JEMALLOC_MUNMAP +#undef JEMALLOC_RETAIN /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS @@ -224,12 +229,6 @@ #undef JEMALLOC_INTERNAL_FFS /* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. - */ -#undef JEMALLOC_IVSALLOC - -/* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. */ @@ -253,24 +252,26 @@ #undef JEMALLOC_HAVE_MADVISE /* - * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE - * arguments to madvise(2). - */ -#undef JEMALLOC_HAVE_MADVISE_HUGE - -/* * Methods for purging unused pages differ between operating systems. * * madvise(..., MADV_FREE) : This marks pages as being unused, such that they * will be discarded rather than swapped out. - * madvise(..., MADV_DONTNEED) : This immediately discards pages, such that - * new pages will be demand-zeroed if the - * address region is later touched. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. */ #undef JEMALLOC_PURGE_MADVISE_FREE #undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS -/* Defined if transparent huge page support is enabled. */ +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ #undef JEMALLOC_THP /* Define if operating system has alloca.h header. */ @@ -300,9 +301,26 @@ /* glibc memalign hook. */ #undef JEMALLOC_GLIBC_MEMALIGN_HOOK +/* pthread support */ +#undef JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#undef JEMALLOC_HAVE_DLSYM + /* Adaptive mutex support in pthreads. */ #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP +/* GNU specific sched_getcpu support */ +#undef JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#undef JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#undef JEMALLOC_BACKGROUND_THREAD + /* * If defined, jemalloc symbols are not exported (doesn't work when * JEMALLOC_PREFIX is not defined). @@ -312,4 +330,7 @@ /* config.malloc_conf options string. */ #undef JEMALLOC_CONFIG_MALLOC_CONF +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#undef JEMALLOC_IS_MALLOC + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h new file mode 100644 index 0000000..e10fb27 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -0,0 +1,53 @@ +#ifndef JEMALLOC_INTERNAL_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTERNS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd_types.h" + +/* TSD checks this to set thread local slow state accordingly. */ +extern bool malloc_slow; + +/* Run-time options. */ +extern bool opt_abort; +extern bool opt_abort_conf; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; +extern bool opt_utrace; +extern bool opt_xmalloc; +extern bool opt_zero; +extern unsigned opt_narenas; + +/* Number of CPUs. */ +extern unsigned ncpus; + +/* Number of arenas used for automatic multiplexing of threads and arenas. */ +extern unsigned narenas_auto; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern atomic_p_t arenas[]; + +void *a0malloc(size_t size); +void a0dalloc(void *ptr); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); +void arena_set(unsigned ind, arena_t *arena); +unsigned narenas_total_get(void); +arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); +arena_t *arena_choose_hard(tsd_t *tsd, bool internal); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +void iarena_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void jemalloc_prefork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); +bool malloc_initialized(void); + +#endif /* JEMALLOC_INTERNAL_EXTERNS_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h new file mode 100644 index 0000000..437eaa4 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_INCLUDES_H +#define JEMALLOC_INTERNAL_INCLUDES_H + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. + * + * Historically, we dealt with this by each header into four sections (types, + * structs, externs, and inlines), and included each header file multiple times + * in this file, picking out the portion we want on each pass using the + * following #defines: + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + * + * We're moving toward a world in which the dependencies are explicit; each file + * will #include the headers it depends on (rather than relying on them being + * implicitly available via this file including every header file in the + * project). + * + * We're now in an intermediate state: we've broken up the header files to avoid + * having to include each one multiple times, but have not yet moved the + * dependency information into the header files (i.e. we still rely on the + * ordering in this file to ensure all a header's dependencies are available in + * its translation unit). Each component is now broken up into multiple header + * files, corresponding to the sections above (e.g. instead of "foo.h", we now + * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h"). + * + * Those files which have been converted to explicitly include their + * inter-component dependencies are now in the initial HERMETIC HEADERS + * section. All headers may still rely on jemalloc_preamble.h (which, by fiat, + * must be included first in every translation unit) for system headers and + * global jemalloc definitions, however. + */ + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/base_types.h" +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/prof_types.h" + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +#include "jemalloc/internal/arena_structs_a.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/arena_structs_b.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/background_thread_structs.h" + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/extent_externs.h" +#include "jemalloc/internal/base_externs.h" +#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/background_thread_externs.h" + +/******************************************************************************/ +/* INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/base_inlines.h" +/* + * Include portions of arena code interleaved with tcache code in order to + * resolve circular dependencies. + */ +#include "jemalloc/internal/prof_inlines_a.h" +#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/extent_inlines.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof_inlines_b.h" +#include "jemalloc/internal/background_thread_inlines.h" + +#endif /* JEMALLOC_INTERNAL_INCLUDES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h new file mode 100644 index 0000000..854fb1e --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -0,0 +1,168 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_A_H +#define JEMALLOC_INTERNAL_INLINES_A_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/ticker.h" + +JEMALLOC_ALWAYS_INLINE malloc_cpuid_t +malloc_getcpu(void) { + assert(have_percpu_arena); +#if defined(JEMALLOC_HAVE_SCHED_GETCPU) + return (malloc_cpuid_t)sched_getcpu(); +#else + not_reached(); + return -1; +#endif +} + +/* Return the chosen arena index based on current cpu. */ +JEMALLOC_ALWAYS_INLINE unsigned +percpu_arena_choose(void) { + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)); + + malloc_cpuid_t cpuid = malloc_getcpu(); + assert(cpuid >= 0); + + unsigned arena_ind; + if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus / + 2)) { + arena_ind = cpuid; + } else { + assert(opt_percpu_arena == per_phycpu_arena); + /* Hyper threads on the same physical CPU share arena. */ + arena_ind = cpuid - ncpus / 2; + } + + return arena_ind; +} + +/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */ +JEMALLOC_ALWAYS_INLINE unsigned +percpu_arena_ind_limit(percpu_arena_mode_t mode) { + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode)); + if (mode == per_phycpu_arena && ncpus > 1) { + if (ncpus % 2) { + /* This likely means a misconfig. */ + return ncpus / 2 + 1; + } + return ncpus / 2; + } else { + return ncpus; + } +} + +static inline arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) { + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return arena_tdata_get_hard(tsd, ind); + } + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or tdata to be + * initialized. + */ + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); + } + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) { + return tdata; + } + return arena_tdata_get_hard(tsd, ind); +} + +static inline arena_t * +arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) { + arena_t *ret; + + assert(ind < MALLOCX_ARENA_LIMIT); + + ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE); + if (unlikely(ret == NULL)) { + if (init_if_missing) { + ret = arena_init(tsdn, ind, + (extent_hooks_t *)&extent_hooks_default); + } + } + return ret; +} + +static inline ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) { + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) { + return NULL; + } + return &tdata->decay_ticker; +} + +JEMALLOC_ALWAYS_INLINE tcache_bin_t * +tcache_small_bin_get(tcache_t *tcache, szind_t binind) { + assert(binind < NBINS); + return &tcache->tbins_small[binind]; +} + +JEMALLOC_ALWAYS_INLINE tcache_bin_t * +tcache_large_bin_get(tcache_t *tcache, szind_t binind) { + assert(binind >= NBINS &&binind < nhbins); + return &tcache->tbins_large[binind - NBINS]; +} + +JEMALLOC_ALWAYS_INLINE bool +tcache_available(tsd_t *tsd) { + /* + * Thread specific auto tcache might be unavailable if: 1) during tcache + * initialization, or 2) disabled through thread.tcache.enabled mallctl + * or config options. This check covers all cases. + */ + if (likely(tsd_tcache_enabled_get(tsd))) { + /* Associated arena == NULL implies tcache init in progress. */ + assert(tsd_tcachep_get(tsd)->arena == NULL || + tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail != + NULL); + return true; + } + + return false; +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get(tsd_t *tsd) { + if (!tcache_available(tsd)) { + return NULL; + } + + return tsd_tcachep_get(tsd); +} + +static inline void +pre_reentrancy(tsd_t *tsd) { + bool fast = tsd_fast(tsd); + ++*tsd_reentrancy_levelp_get(tsd); + if (fast) { + /* Prepare slow path for reentrancy. */ + tsd_slow_update(tsd); + assert(tsd->state == tsd_state_nominal_slow); + } +} + +static inline void +post_reentrancy(tsd_t *tsd) { + int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd); + assert(*reentrancy_level > 0); + if (--*reentrancy_level == 0) { + tsd_slow_update(tsd); + } +} + +#endif /* JEMALLOC_INTERNAL_INLINES_A_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h new file mode 100644 index 0000000..2e76e5d --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -0,0 +1,86 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_B_H +#define JEMALLOC_INTERNAL_INLINES_B_H + +#include "jemalloc/internal/rtree.h" + +/* Choose an arena based on a per-thread value. */ +static inline arena_t * +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { + arena_t *ret; + + if (arena != NULL) { + return arena; + } + + /* During reentrancy, arena 0 is the safest bet. */ + if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { + return arena_get(tsd_tsdn(tsd), 0, true); + } + + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) { + ret = arena_choose_hard(tsd, internal); + assert(ret); + if (tcache_available(tsd)) { + tcache_t *tcache = tcache_get(tsd); + if (tcache->arena != NULL) { + /* See comments in tcache_data_init().*/ + assert(tcache->arena == + arena_get(tsd_tsdn(tsd), 0, false)); + if (tcache->arena != ret) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tcache, ret); + } + } else { + tcache_arena_associate(tsd_tsdn(tsd), tcache, + ret); + } + } + } + + /* + * Note that for percpu arena, if the current arena is outside of the + * auto percpu arena range, (i.e. thread is assigned to a manually + * managed arena), then percpu arena is skipped. + */ + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) && + !internal && (arena_ind_get(ret) < + percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd != + tsd_tsdn(tsd))) { + unsigned ind = percpu_arena_choose(); + if (arena_ind_get(ret) != ind) { + percpu_arena_update(tsd, ind); + ret = tsd_arena_get(tsd); + } + ret->last_thd = tsd_tsdn(tsd); + } + + return ret; +} + +static inline arena_t * +arena_choose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, false); +} + +static inline arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, true); +} + +static inline bool +arena_is_auto(arena_t *arena) { + assert(narenas_auto > 0); + return (arena_ind_get(arena) < narenas_auto); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +iealloc(tsdn_t *tsdn, const void *ptr) { + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true); +} + +#endif /* JEMALLOC_INTERNAL_INLINES_B_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h new file mode 100644 index 0000000..7ffce6f --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -0,0 +1,197 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_C_H +#define JEMALLOC_INTERNAL_INLINES_C_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/witness.h" + +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_aalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE size_t +isalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_salloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_internal, arena_t *arena, bool slow_path) { + void *ret; + + assert(size != 0); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { + return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false, + NULL, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_internal, arena_t *arena) { + void *ret; + + assert(usize != 0); + assert(usize == sz_sa2u(usize, alignment)); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { + return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { + return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, + tcache_get(tsd), false, NULL); +} + +JEMALLOC_ALWAYS_INLINE size_t +ivsalloc(tsdn_t *tsdn, const void *ptr) { + return arena_vsalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void +idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx, + bool is_internal, bool slow_path) { + assert(ptr != NULL); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr))); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + if (config_stats && is_internal) { + arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); + } + if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { + assert(tcache == NULL); + } + arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(tsd_t *tsd, void *ptr) { + idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, + arena_t *arena) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + void *p; + size_t usize, copysize; + + usize = sz_sa2u(size + extra, alignment); + if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + return NULL; + } + p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + if (p == NULL) { + if (extra == 0) { + return NULL; + } + /* Try again, without extra this time. */ + usize = sz_sa2u(size, alignment); + if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + return NULL; + } + p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + if (p == NULL) { + return NULL; + } + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); + return p; +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena) { + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment, + zero, tcache, arena); + } + + return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, + tcache); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) { + return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, + tcache_get(tsd), NULL); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) { + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return true; + } + + return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero); +} + +#endif /* JEMALLOC_INTERNAL_INLINES_C_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index a08ba77..4571895 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -1,57 +1,40 @@ -/* - * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for - * functions that are static inline functions if inlining is enabled, and - * single-definition library-private functions if inlining is disabled. - * - * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in - * which case the denoted functions are always static, regardless of whether - * inlining is enabled. - */ -#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) - /* Disable inlining to make debugging/profiling easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define JEMALLOC_INLINE_C static -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# define JEMALLOC_INLINE_C static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif +#ifndef JEMALLOC_INTERNAL_MACROS_H +#define JEMALLOC_INTERNAL_MACROS_H -#ifdef JEMALLOC_CC_SILENCE -# define UNUSED JEMALLOC_ATTR(unused) +#ifdef JEMALLOC_DEBUG +# define JEMALLOC_ALWAYS_INLINE static inline #else -# define UNUSED +# define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline #endif +#ifdef _MSC_VER +# define inline _inline +#endif + +#define UNUSED JEMALLOC_ATTR(unused) -#define ZU(z) ((size_t)z) -#define ZI(z) ((ssize_t)z) -#define QU(q) ((uint64_t)q) -#define QI(q) ((int64_t)q) +#define ZU(z) ((size_t)z) +#define ZD(z) ((ssize_t)z) +#define QU(q) ((uint64_t)q) +#define QD(q) ((int64_t)q) -#define KZU(z) ZU(z##ULL) -#define KZI(z) ZI(z##LL) -#define KQU(q) QU(q##ULL) -#define KQI(q) QI(q##LL) +#define KZU(z) ZU(z##ULL) +#define KZD(z) ZD(z##LL) +#define KQU(q) QU(q##ULL) +#define KQD(q) QI(q##LL) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif -#ifndef JEMALLOC_HAS_RESTRICT +#if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus) # define restrict #endif + +/* Various function pointers are statick and immutable except during testing. */ +#ifdef JEMALLOC_JET +# define JET_MUTABLE +#else +# define JET_MUTABLE const +#endif + +#endif /* JEMALLOC_INTERNAL_MACROS_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h new file mode 100644 index 0000000..50f9d00 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal_types.h @@ -0,0 +1,178 @@ +#ifndef JEMALLOC_INTERNAL_TYPES_H +#define JEMALLOC_INTERNAL_TYPES_H + +/* Page size index type. */ +typedef unsigned pszind_t; + +/* Size class index type. */ +typedef unsigned szind_t; + +/* Processor / core id type. */ +typedef int malloc_cpuid_t; + +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_BITS 12 +#define MALLOCX_TCACHE_BITS 12 +#define MALLOCX_LG_ALIGN_BITS 6 +#define MALLOCX_ARENA_SHIFT 20 +#define MALLOCX_TCACHE_SHIFT 8 +#define MALLOCX_ARENA_MASK \ + (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT) +/* NB: Arena index bias decreases the maximum number of arenas by 1. */ +#define MALLOCX_ARENA_LIMIT ((1 << MALLOCX_ARENA_BITS) - 1) +#define MALLOCX_TCACHE_MASK \ + (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT) +#define MALLOCX_TCACHE_MAX ((1 << MALLOCX_TCACHE_BITS) - 3) +#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1) + +/* Smallest size class to support. */ +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __riscv__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390__ +# define LG_QUANTUM 4 +# endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. + */ +#define LG_CACHELINE 6 +#define CACHELINE 64 +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & ((~(alignment)) + 1))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & ((~(alignment)) + 1)) + +/* Declare a variable-length array. */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# ifdef JEMALLOC_HAS_ALLOCA_H +# include <alloca.h> +# else +# include <stdlib.h> +# endif +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * (count)) +#else +# define VARIABLE_ARRAY(type, name, count) type name[(count)] +#endif + +#endif /* JEMALLOC_INTERNAL_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in new file mode 100644 index 0000000..18539a0 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -0,0 +1,179 @@ +#ifndef JEMALLOC_PREAMBLE_H +#define JEMALLOC_PREAMBLE_H + +#include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" + +#ifdef JEMALLOC_UTRACE +#include <sys/ktrace.h> +#endif + +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# undef JEMALLOC_IS_MALLOC +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc@install_suffix@.h" +# undef JEMALLOC_NO_RENAME +#else +# define JEMALLOC_N(n) @private_namespace@##n +# include "../jemalloc@install_suffix@.h" +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include <libkern/OSAtomic.h> +#endif + +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#endif + +#include "jemalloc/internal/jemalloc_internal_macros.h" + +/* + * Note that the ordering matters here; the hook itself is name-mangled. We + * want the inclusion of hooks to happen early, so that we hook as much as + * possible. + */ +#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE +# ifndef JEMALLOC_JET +# include "jemalloc/internal/private_namespace.h" +# else +# include "jemalloc/internal/private_namespace_jet.h" +# endif +#endif +#include "jemalloc/internal/hooks.h" + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool have_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_thp = +#ifdef JEMALLOC_THP + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; +#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* Currently percpu_arena depends on sched_getcpu. */ +#define JEMALLOC_PERCPU_ARENA +#endif +static const bool have_percpu_arena = +#ifdef JEMALLOC_PERCPU_ARENA + true +#else + false +#endif + ; +/* + * Undocumented, and not recommended; the application should take full + * responsibility for tracking provenance. + */ +static const bool force_ivsalloc = +#ifdef JEMALLOC_FORCE_IVSALLOC + true +#else + false +#endif + ; +static const bool have_background_thread = +#ifdef JEMALLOC_BACKGROUND_THREAD + true +#else + false +#endif + ; + +#endif /* JEMALLOC_PREAMBLE_H */ diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h new file mode 100644 index 0000000..3f36282 --- /dev/null +++ b/include/jemalloc/internal/large_externs.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H + +void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); +void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, + bool zero); +bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, + size_t usize_max, bool zero); +void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); + +typedef void (large_dalloc_junk_t)(void *, size_t); +extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk; + +typedef void (large_dalloc_maybe_junk_t)(void *, size_t); +extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk; + +void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent); +void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent); +void large_dalloc(tsdn_t *tsdn, extent_t *extent); +size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); +prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); +void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); +void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); + +#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h new file mode 100644 index 0000000..47ae58e --- /dev/null +++ b/include/jemalloc/internal/malloc_io.h @@ -0,0 +1,62 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H +#define JEMALLOC_INTERNAL_MALLOC_IO_H + +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include <inttypes.h> +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr, + int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +size_t malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); + +#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */ diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h deleted file mode 100644 index e58da5c..0000000 --- a/include/jemalloc/internal/mb.h +++ /dev/null @@ -1,115 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void mb_write(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) -#ifdef __i386__ -/* - * According to the Intel Architecture Software Developer's Manual, current - * processors execute instructions in order from the perspective of other - * processors in a multiprocessor system, but 1) Intel reserves the right to - * change that, and 2) the compiler's optimizer could re-order instructions if - * there weren't some form of barrier. Therefore, even if running on an - * architecture that does not need memory barriers (everything through at least - * i686), an "optimizer barrier" is necessary. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - -# if 0 - /* This is a true memory barrier. */ - asm volatile ("pusha;" - "xor %%eax,%%eax;" - "cpuid;" - "popa;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -# else - /* - * This is hopefully enough to keep the compiler from reordering - * instructions around this one. - */ - asm volatile ("nop;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -# endif -} -#elif (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("sfence" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__powerpc__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("eieio" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__sparc__) && defined(__arch64__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("membar #StoreStore" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__tile__) -JEMALLOC_INLINE void -mb_write(void) -{ - - __sync_synchronize(); -} -#else -/* - * This is much slower than a simple memory barrier, but the semantics of mutex - * unlock make this work. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - malloc_mutex_t mtx; - - malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); - malloc_mutex_lock(TSDN_NULL, &mtx); - malloc_mutex_unlock(TSDN_NULL, &mtx); -} -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 2b4b1c3..6520c25 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -1,59 +1,123 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_MUTEX_H +#define JEMALLOC_INTERNAL_MUTEX_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" + +typedef enum { + /* Can only acquire one mutex of a given witness rank at a time. */ + malloc_mutex_rank_exclusive, + /* + * Can acquire multiple mutexes of the same witness rank, but in + * address-ascending order only. + */ + malloc_mutex_address_ordered +} malloc_mutex_lock_order_t; typedef struct malloc_mutex_s malloc_mutex_t; - +struct malloc_mutex_s { + union { + struct { + /* + * prof_data is defined first to reduce cacheline + * bouncing: the data is not touched by the mutex holder + * during unlocking, while might be modified by + * contenders. Having it before the mutex itself could + * avoid prefetching a modified cacheline (for the + * unlocking thread). + */ + mutex_prof_data_t prof_data; #ifdef _WIN32 -# define MALLOC_MUTEX_INITIALIZER +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else + CRITICAL_SECTION lock; +# endif #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) -# define MALLOC_MUTEX_INITIALIZER \ - {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} + OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; #else -# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ - defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ - WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} -# else -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER \ - {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} -# endif + pthread_mutex_t lock; #endif + }; + /* + * We only touch witness when configured w/ debug. However we + * keep the field in a union when !debug so that we don't have + * to pollute the code base with #ifdefs, while avoid paying the + * memory cost. + */ +#if !defined(JEMALLOC_DEBUG) + witness_t witness; + malloc_mutex_lock_order_t lock_order; +#endif + }; -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +#if defined(JEMALLOC_DEBUG) + witness_t witness; + malloc_mutex_lock_order_t lock_order; +#endif +}; + +/* + * Based on benchmark results, a fixed spin with this amount of retries works + * well for our critical sections. + */ +#define MALLOC_MUTEX_MAX_SPIN 250 -struct malloc_mutex_s { #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 - SRWLOCK lock; +# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock)) # else - CRITICAL_SECTION lock; +# define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock)) # endif #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock lock; +# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock)) #elif (defined(JEMALLOC_OSSPIN)) - OSSpinLock lock; -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) - pthread_mutex_t lock; - malloc_mutex_t *postponed_next; +# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock)) #else - pthread_mutex_t lock; +# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0) #endif - witness_t witness; -}; -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +#define LOCK_PROF_DATA_INITIALIZER \ + {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, \ + ATOMIC_INIT(0), 0, NULL, 0} + +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#endif #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; @@ -62,84 +126,123 @@ extern bool isthreaded; # define isthreaded true #endif -bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, - witness_rank_t rank); -void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); -bool malloc_mutex_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); -void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); -#endif +bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, + witness_rank_t rank, malloc_mutex_lock_order_t lock_order); +void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); +bool malloc_mutex_boot(void); +void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex); -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE void -malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ +void malloc_mutex_lock_slow(malloc_mutex_t *mutex); - witness_assert_not_owner(tsdn, &mutex->witness); +static inline void +malloc_mutex_lock_final(malloc_mutex_t *mutex) { + MALLOC_MUTEX_LOCK(mutex); +} + +static inline bool +malloc_mutex_trylock_final(malloc_mutex_t *mutex) { + return MALLOC_MUTEX_TRYLOCK(mutex); +} + +static inline void +mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) { + if (config_stats) { + mutex_prof_data_t *data = &mutex->prof_data; + data->n_lock_ops++; + if (data->prev_owner != tsdn) { + data->prev_owner = tsdn; + data->n_owner_switches++; + } + } +} + +/* Trylock: return false if the lock is successfully acquired. */ +static inline bool +malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - AcquireSRWLockExclusive(&mutex->lock); -# else - EnterCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_lock(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockLock(&mutex->lock); -#else - pthread_mutex_lock(&mutex->lock); -#endif + if (malloc_mutex_trylock_final(mutex)) { + return true; + } + mutex_owner_stats_update(tsdn, mutex); } - witness_lock(tsdn, &mutex->witness); + witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); + + return false; } -JEMALLOC_INLINE void -malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ +/* Aggregate lock prof data. */ +static inline void +malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) { + nstime_add(&sum->tot_wait_time, &data->tot_wait_time); + if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) { + nstime_copy(&sum->max_wait_time, &data->max_wait_time); + } + + sum->n_wait_times += data->n_wait_times; + sum->n_spin_acquired += data->n_spin_acquired; - witness_unlock(tsdn, &mutex->witness); + if (sum->max_n_thds < data->max_n_thds) { + sum->max_n_thds = data->max_n_thds; + } + uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds, + ATOMIC_RELAXED); + uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32( + &data->n_waiting_thds, ATOMIC_RELAXED); + atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds, + ATOMIC_RELAXED); + sum->n_owner_switches += data->n_owner_switches; + sum->n_lock_ops += data->n_lock_ops; +} + +static inline void +malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - ReleaseSRWLockExclusive(&mutex->lock); -# else - LeaveCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - os_unfair_lock_unlock(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockUnlock(&mutex->lock); -#else - pthread_mutex_unlock(&mutex->lock); -#endif + if (malloc_mutex_trylock_final(mutex)) { + malloc_mutex_lock_slow(mutex); + } + mutex_owner_stats_update(tsdn, mutex); } + witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); } -JEMALLOC_INLINE void -malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ +static inline void +malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); + if (isthreaded) { + MALLOC_MUTEX_UNLOCK(mutex); + } +} - witness_assert_owner(tsdn, &mutex->witness); +static inline void +malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); } -JEMALLOC_INLINE void -malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) -{ +static inline void +malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); +} - witness_assert_not_owner(tsdn, &mutex->witness); +/* Copy the prof data from mutex for processing. */ +static inline void +malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data, + malloc_mutex_t *mutex) { + mutex_prof_data_t *source = &mutex->prof_data; + /* Can only read holding the mutex. */ + malloc_mutex_assert_owner(tsdn, mutex); + + /* + * Not *really* allowed (we shouldn't be doing non-atomic loads of + * atomic data), but the mutex protection makes this safe, and writing + * a member-for-member copy is tedious for this situation. + */ + *data = *source; + /* n_wait_thds is not reported (modified w/o locking). */ + atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_MUTEX_H */ diff --git a/include/jemalloc/internal/mutex_pool.h b/include/jemalloc/internal/mutex_pool.h new file mode 100644 index 0000000..726cece --- /dev/null +++ b/include/jemalloc/internal/mutex_pool.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H +#define JEMALLOC_INTERNAL_MUTEX_POOL_H + +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/witness.h" + +/* We do mod reductions by this value, so it should be kept a power of 2. */ +#define MUTEX_POOL_SIZE 256 + +typedef struct mutex_pool_s mutex_pool_t; +struct mutex_pool_s { + malloc_mutex_t mutexes[MUTEX_POOL_SIZE]; +}; + +bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank); + +/* Internal helper - not meant to be called outside this module. */ +static inline malloc_mutex_t * +mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) { + size_t hash_result[2]; + hash(&key, sizeof(key), 0xd50dcc1b, hash_result); + return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE]; +} + +static inline void +mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) { + for (int i = 0; i < MUTEX_POOL_SIZE; i++) { + malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]); + } +} + +/* + * Note that a mutex pool doesn't work exactly the way an embdedded mutex would. + * You're not allowed to acquire mutexes in the pool one at a time. You have to + * acquire all the mutexes you'll need in a single function call, and then + * release them all in a single function call. + */ + +static inline void +mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + mutex_pool_assert_not_held(tsdn, pool); + + malloc_mutex_t *mutex = mutex_pool_mutex(pool, key); + malloc_mutex_lock(tsdn, mutex); +} + +static inline void +mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + malloc_mutex_t *mutex = mutex_pool_mutex(pool, key); + malloc_mutex_unlock(tsdn, mutex); + + mutex_pool_assert_not_held(tsdn, pool); +} + +static inline void +mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1, + uintptr_t key2) { + mutex_pool_assert_not_held(tsdn, pool); + + malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1); + malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2); + if ((uintptr_t)mutex1 < (uintptr_t)mutex2) { + malloc_mutex_lock(tsdn, mutex1); + malloc_mutex_lock(tsdn, mutex2); + } else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) { + malloc_mutex_lock(tsdn, mutex1); + } else { + malloc_mutex_lock(tsdn, mutex2); + malloc_mutex_lock(tsdn, mutex1); + } +} + +static inline void +mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1, + uintptr_t key2) { + malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1); + malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2); + if (mutex1 == mutex2) { + malloc_mutex_unlock(tsdn, mutex1); + } else { + malloc_mutex_unlock(tsdn, mutex1); + malloc_mutex_unlock(tsdn, mutex2); + } + + mutex_pool_assert_not_held(tsdn, pool); +} + +static inline void +mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key)); +} + +#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */ diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h new file mode 100644 index 0000000..3358bcf --- /dev/null +++ b/include/jemalloc/internal/mutex_prof.h @@ -0,0 +1,86 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H +#define JEMALLOC_INTERNAL_MUTEX_PROF_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/tsd_types.h" + +#define MUTEX_PROF_GLOBAL_MUTEXES \ + OP(background_thread) \ + OP(ctl) \ + OP(prof) + +typedef enum { +#define OP(mtx) global_prof_mutex_##mtx, + MUTEX_PROF_GLOBAL_MUTEXES +#undef OP + mutex_prof_num_global_mutexes +} mutex_prof_global_ind_t; + +#define MUTEX_PROF_ARENA_MUTEXES \ + OP(large) \ + OP(extent_avail) \ + OP(extents_dirty) \ + OP(extents_muzzy) \ + OP(extents_retained) \ + OP(decay_dirty) \ + OP(decay_muzzy) \ + OP(base) \ + OP(tcache_list) + +typedef enum { +#define OP(mtx) arena_prof_mutex_##mtx, + MUTEX_PROF_ARENA_MUTEXES +#undef OP + mutex_prof_num_arena_mutexes +} mutex_prof_arena_ind_t; + +#define MUTEX_PROF_COUNTERS \ + OP(num_ops, uint64_t) \ + OP(num_wait, uint64_t) \ + OP(num_spin_acq, uint64_t) \ + OP(num_owner_switch, uint64_t) \ + OP(total_wait_time, uint64_t) \ + OP(max_wait_time, uint64_t) \ + OP(max_num_thds, uint32_t) + +typedef enum { +#define OP(counter, type) mutex_counter_##counter, + MUTEX_PROF_COUNTERS +#undef OP + mutex_prof_num_counters +} mutex_prof_counter_ind_t; + +typedef struct { + /* + * Counters touched on the slow path, i.e. when there is lock + * contention. We update them once we have the lock. + */ + /* Total time (in nano seconds) spent waiting on this mutex. */ + nstime_t tot_wait_time; + /* Max time (in nano seconds) spent on a single lock operation. */ + nstime_t max_wait_time; + /* # of times have to wait for this mutex (after spinning). */ + uint64_t n_wait_times; + /* # of times acquired the mutex through local spinning. */ + uint64_t n_spin_acquired; + /* Max # of threads waiting for the mutex at the same time. */ + uint32_t max_n_thds; + /* Current # of threads waiting on the lock. Atomic synced. */ + atomic_u32_t n_waiting_thds; + + /* + * Data touched on the fast path. These are modified right after we + * grab the lock, so it's placed closest to the end (i.e. right before + * the lock) so that we have a higher chance of them being on the same + * cacheline. + */ + /* # of times the mutex holder is different than the previous one. */ + uint64_t n_owner_switches; + /* Previous mutex holder, to facilitate n_owner_switches. */ + tsdn_t *prev_owner; + /* # of lock() operations in total. */ + uint64_t n_lock_ops; +} mutex_prof_data_t; + +#endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */ diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h index 93b27dc..17c177c 100644 --- a/include/jemalloc/internal/nstime.h +++ b/include/jemalloc/internal/nstime.h @@ -1,48 +1,34 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct nstime_s nstime_t; +#ifndef JEMALLOC_INTERNAL_NSTIME_H +#define JEMALLOC_INTERNAL_NSTIME_H /* Maximum supported number of seconds (~584 years). */ -#define NSTIME_SEC_MAX KQU(18446744072) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct nstime_s { - uint64_t ns; -}; +#define NSTIME_SEC_MAX KQU(18446744072) +#define NSTIME_ZERO_INITIALIZER {0} + +typedef struct { + uint64_t ns; +} nstime_t; + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_msec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_iadd(nstime_t *time, uint64_t addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_isubtract(nstime_t *time, uint64_t subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void nstime_init(nstime_t *time, uint64_t ns); -void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); -uint64_t nstime_ns(const nstime_t *time); -uint64_t nstime_sec(const nstime_t *time); -uint64_t nstime_nsec(const nstime_t *time); -void nstime_copy(nstime_t *time, const nstime_t *source); -int nstime_compare(const nstime_t *a, const nstime_t *b); -void nstime_add(nstime_t *time, const nstime_t *addend); -void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); -void nstime_imultiply(nstime_t *time, uint64_t multiplier); -void nstime_idivide(nstime_t *time, uint64_t divisor); -uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); -#ifdef JEMALLOC_JET typedef bool (nstime_monotonic_t)(void); -extern nstime_monotonic_t *nstime_monotonic; -typedef bool (nstime_update_t)(nstime_t *); -extern nstime_update_t *nstime_update; -#else -bool nstime_monotonic(void); -bool nstime_update(nstime_t *time); -#endif +extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic; -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *JET_MUTABLE nstime_update; -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_NSTIME_H */ diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h index 4ae9f15..28383b7 100644 --- a/include/jemalloc/internal/pages.h +++ b/include/jemalloc/internal/pages.h @@ -1,29 +1,71 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H +#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Page size. LG_PAGE is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define PAGE ((size_t)(1U << LG_PAGE)) +#define PAGE_MASK ((size_t)(PAGE - 1)) +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +/* Huge page size. LG_HUGEPAGE is determined by the configure script. */ +#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE)) +#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1)) +/* Return the huge page base address for the huge page containing address a. */ +#define HUGEPAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define HUGEPAGE_CEILING(s) \ + (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK) -void *pages_map(void *addr, size_t size, bool *commit); -void pages_unmap(void *addr, size_t size); -void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, - size_t size, bool *commit); -bool pages_commit(void *addr, size_t size); -bool pages_decommit(void *addr, size_t size); -bool pages_purge(void *addr, size_t size); -bool pages_huge(void *addr, size_t size); -bool pages_nohuge(void *addr, size_t size); -void pages_boot(void); +/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */ +#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE) +# define PAGES_CAN_PURGE_LAZY +#endif +/* + * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported. + * + * The only supported way to hard-purge on Windows is to decommit and then + * re-commit, but doing so is racy, and if re-commit fails it's a pain to + * propagate the "poisoned" memory state. Since we typically decommit as the + * next step after purging on Windows anyway, there's no point in adding such + * complexity. + */ +#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ + defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \ + defined(JEMALLOC_MAPS_COALESCE)) +# define PAGES_CAN_PURGE_FORCED +#endif -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +static const bool pages_can_purge_lazy = +#ifdef PAGES_CAN_PURGE_LAZY + true +#else + false +#endif + ; +static const bool pages_can_purge_forced = +#ifdef PAGES_CAN_PURGE_FORCED + true +#else + false +#endif + ; -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); +void pages_unmap(void *addr, size_t size); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge_lazy(void *addr, size_t size); +bool pages_purge_forced(void *addr, size_t size); +bool pages_huge(void *addr, size_t size); +bool pages_nohuge(void *addr, size_t size); +bool pages_boot(void); +#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h index 4f91c33..84d6778 100644 --- a/include/jemalloc/internal/ph.h +++ b/include/jemalloc/internal/ph.h @@ -13,10 +13,10 @@ */ #ifndef PH_H_ -#define PH_H_ +#define PH_H_ /* Node structure. */ -#define phn(a_type) \ +#define phn(a_type) \ struct { \ a_type *phn_prev; \ a_type *phn_next; \ @@ -24,31 +24,31 @@ struct { \ } /* Root structure. */ -#define ph(a_type) \ +#define ph(a_type) \ struct { \ a_type *ph_root; \ } /* Internal utility macros. */ -#define phn_lchild_get(a_type, a_field, a_phn) \ +#define phn_lchild_get(a_type, a_field, a_phn) \ (a_phn->a_field.phn_lchild) -#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ +#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ a_phn->a_field.phn_lchild = a_lchild; \ } while (0) -#define phn_next_get(a_type, a_field, a_phn) \ +#define phn_next_get(a_type, a_field, a_phn) \ (a_phn->a_field.phn_next) -#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ +#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ a_phn->a_field.phn_prev = a_prev; \ } while (0) -#define phn_prev_get(a_type, a_field, a_phn) \ +#define phn_prev_get(a_type, a_field, a_phn) \ (a_phn->a_field.phn_prev) -#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ +#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ a_phn->a_field.phn_next = a_next; \ } while (0) -#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ +#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ a_type *phn0child; \ \ assert(a_phn0 != NULL); \ @@ -58,17 +58,18 @@ struct { \ phn_prev_set(a_type, a_field, a_phn1, a_phn0); \ phn0child = phn_lchild_get(a_type, a_field, a_phn0); \ phn_next_set(a_type, a_field, a_phn1, phn0child); \ - if (phn0child != NULL) \ + if (phn0child != NULL) { \ phn_prev_set(a_type, a_field, phn0child, a_phn1); \ + } \ phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \ } while (0) -#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ - if (a_phn0 == NULL) \ +#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ + if (a_phn0 == NULL) { \ r_phn = a_phn1; \ - else if (a_phn1 == NULL) \ + } else if (a_phn1 == NULL) { \ r_phn = a_phn0; \ - else if (a_cmp(a_phn0, a_phn1) < 0) { \ + } else if (a_cmp(a_phn0, a_phn1) < 0) { \ phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \ a_cmp); \ r_phn = a_phn0; \ @@ -79,7 +80,7 @@ struct { \ } \ } while (0) -#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ +#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ a_type *head = NULL; \ a_type *tail = NULL; \ a_type *phn0 = a_phn; \ @@ -95,8 +96,9 @@ struct { \ */ \ if (phn1 != NULL) { \ a_type *phnrest = phn_next_get(a_type, a_field, phn1); \ - if (phnrest != NULL) \ + if (phnrest != NULL) { \ phn_prev_set(a_type, a_field, phnrest, NULL); \ + } \ phn_prev_set(a_type, a_field, phn0, NULL); \ phn_next_set(a_type, a_field, phn0, NULL); \ phn_prev_set(a_type, a_field, phn1, NULL); \ @@ -150,8 +152,9 @@ struct { \ NULL); \ phn_merge(a_type, a_field, phn0, phn1, \ a_cmp, phn0); \ - if (head == NULL) \ + if (head == NULL) { \ break; \ + } \ phn_next_set(a_type, a_field, tail, \ phn0); \ tail = phn0; \ @@ -164,7 +167,7 @@ struct { \ r_phn = phn0; \ } while (0) -#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ +#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \ if (phn != NULL) { \ phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \ @@ -177,11 +180,11 @@ struct { \ } \ } while (0) -#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ +#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \ - if (lchild == NULL) \ + if (lchild == NULL) { \ r_phn = NULL; \ - else { \ + } else { \ ph_merge_siblings(a_type, a_field, lchild, a_cmp, \ r_phn); \ } \ @@ -191,44 +194,50 @@ struct { \ * The ph_proto() macro generates function prototypes that correspond to the * functions generated by an equivalently parameterized call to ph_gen(). */ -#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ +#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ a_attr void a_prefix##new(a_ph_type *ph); \ a_attr bool a_prefix##empty(a_ph_type *ph); \ a_attr a_type *a_prefix##first(a_ph_type *ph); \ +a_attr a_type *a_prefix##any(a_ph_type *ph); \ a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \ a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \ +a_attr a_type *a_prefix##remove_any(a_ph_type *ph); \ a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn); /* * The ph_gen() macro generates a type-specific pairing heap implementation, * based on the above cpp macros. */ -#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ +#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ a_attr void \ -a_prefix##new(a_ph_type *ph) \ -{ \ - \ +a_prefix##new(a_ph_type *ph) { \ memset(ph, 0, sizeof(ph(a_type))); \ } \ a_attr bool \ -a_prefix##empty(a_ph_type *ph) \ -{ \ - \ +a_prefix##empty(a_ph_type *ph) { \ return (ph->ph_root == NULL); \ } \ a_attr a_type * \ -a_prefix##first(a_ph_type *ph) \ -{ \ - \ - if (ph->ph_root == NULL) \ - return (NULL); \ +a_prefix##first(a_ph_type *ph) { \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ ph_merge_aux(a_type, a_field, ph, a_cmp); \ - return (ph->ph_root); \ + return ph->ph_root; \ +} \ +a_attr a_type * \ +a_prefix##any(a_ph_type *ph) { \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + a_type *aux = phn_next_get(a_type, a_field, ph->ph_root); \ + if (aux != NULL) { \ + return aux; \ + } \ + return ph->ph_root; \ } \ a_attr void \ -a_prefix##insert(a_ph_type *ph, a_type *phn) \ -{ \ - \ +a_prefix##insert(a_ph_type *ph, a_type *phn) { \ memset(&phn->a_field, 0, sizeof(phn(a_type))); \ \ /* \ @@ -239,9 +248,9 @@ a_prefix##insert(a_ph_type *ph, a_type *phn) \ * constant-time, whereas eager merging would make insert \ * O(log n). \ */ \ - if (ph->ph_root == NULL) \ + if (ph->ph_root == NULL) { \ ph->ph_root = phn; \ - else { \ + } else { \ phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \ a_field, ph->ph_root)); \ if (phn_next_get(a_type, a_field, ph->ph_root) != \ @@ -255,12 +264,12 @@ a_prefix##insert(a_ph_type *ph, a_type *phn) \ } \ } \ a_attr a_type * \ -a_prefix##remove_first(a_ph_type *ph) \ -{ \ +a_prefix##remove_first(a_ph_type *ph) { \ a_type *ret; \ \ - if (ph->ph_root == NULL) \ - return (NULL); \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ ph_merge_aux(a_type, a_field, ph, a_cmp); \ \ ret = ph->ph_root; \ @@ -268,18 +277,54 @@ a_prefix##remove_first(a_ph_type *ph) \ ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ ph->ph_root); \ \ - return (ret); \ + return ret; \ +} \ +a_attr a_type * \ +a_prefix##remove_any(a_ph_type *ph) { \ + /* \ + * Remove the most recently inserted aux list element, or the \ + * root if the aux list is empty. This has the effect of \ + * behaving as a LIFO (and insertion/removal is therefore \ + * constant-time) if a_prefix##[remove_]first() are never \ + * called. \ + */ \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + a_type *ret = phn_next_get(a_type, a_field, ph->ph_root); \ + if (ret != NULL) { \ + a_type *aux = phn_next_get(a_type, a_field, ret); \ + phn_next_set(a_type, a_field, ph->ph_root, aux); \ + if (aux != NULL) { \ + phn_prev_set(a_type, a_field, aux, \ + ph->ph_root); \ + } \ + return ret; \ + } \ + ret = ph->ph_root; \ + ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ + ph->ph_root); \ + return ret; \ } \ a_attr void \ -a_prefix##remove(a_ph_type *ph, a_type *phn) \ -{ \ +a_prefix##remove(a_ph_type *ph, a_type *phn) { \ a_type *replace, *parent; \ \ - /* \ - * We can delete from aux list without merging it, but we need \ - * to merge if we are dealing with the root node. \ - */ \ if (ph->ph_root == phn) { \ + /* \ + * We can delete from aux list without merging it, but \ + * we need to merge if we are dealing with the root \ + * node and it has children. \ + */ \ + if (phn_lchild_get(a_type, a_field, phn) == NULL) { \ + ph->ph_root = phn_next_get(a_type, a_field, \ + phn); \ + if (ph->ph_root != NULL) { \ + phn_prev_set(a_type, a_field, \ + ph->ph_root, NULL); \ + } \ + return; \ + } \ ph_merge_aux(a_type, a_field, ph, a_cmp); \ if (ph->ph_root == phn) { \ ph_merge_children(a_type, a_field, ph->ph_root, \ @@ -290,8 +335,9 @@ a_prefix##remove(a_ph_type *ph, a_type *phn) \ \ /* Get parent (if phn is leftmost child) before mutating. */ \ if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \ - if (phn_lchild_get(a_type, a_field, parent) != phn) \ + if (phn_lchild_get(a_type, a_field, parent) != phn) { \ parent = NULL; \ + } \ } \ /* Find a possible replacement node, and link to parent. */ \ ph_merge_children(a_type, a_field, phn, a_cmp, replace); \ diff --git a/include/jemalloc/internal/private_namespace.sh b/include/jemalloc/internal/private_namespace.sh index cd25eb3..6ef1346 100755 --- a/include/jemalloc/internal/private_namespace.sh +++ b/include/jemalloc/internal/private_namespace.sh @@ -1,5 +1,5 @@ #!/bin/sh -for symbol in `cat $1` ; do - echo "#define ${symbol} JEMALLOC_N(${symbol})" +for symbol in `cat "$@"` ; do + echo "#define ${symbol} JEMALLOC_N(${symbol})" done diff --git a/include/jemalloc/internal/private_symbols.sh b/include/jemalloc/internal/private_symbols.sh new file mode 100755 index 0000000..442a259 --- /dev/null +++ b/include/jemalloc/internal/private_symbols.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Generate private_symbols[_jet].awk. +# +# Usage: private_symbols.sh <sym_prefix> <sym>* +# +# <sym_prefix> is typically "" or "_". + +sym_prefix=$1 +shift + +cat <<EOF +#!/usr/bin/env awk -f + +BEGIN { + sym_prefix = "${sym_prefix}" + split("\\ +EOF + +for public_sym in "$@" ; do + cat <<EOF + ${sym_prefix}${public_sym} \\ +EOF +done + +cat <<"EOF" + ", exported_symbol_names) + # Store exported symbol names as keys in exported_symbols. + for (i in exported_symbol_names) { + exported_symbols[exported_symbol_names[i]] = 1 + } +} + +# Process 'nm -a <c_source.o>' output. +# +# Handle lines like: +# 0000000000000008 D opt_junk +# 0000000000007574 T malloc_initialized +(NF == 3 && $2 ~ /^[ABCDGRSTVW]$/ && !($3 in exported_symbols) && $3 ~ /^[A-Za-z0-9_]+$/) { + print substr($3, 1+length(sym_prefix), length($3)-length(sym_prefix)) +} + +# Process 'dumpbin /SYMBOLS <c_source.obj>' output. +# +# Handle lines like: +# 353 00008098 SECT4 notype External | opt_junk +# 3F1 00000000 SECT7 notype () External | malloc_initialized +($3 ~ /^SECT[0-9]+/ && $(NF-2) == "External" && !($NF in exported_symbols)) { + print $NF +} +EOF diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt deleted file mode 100644 index 60b57e5..0000000 --- a/include/jemalloc/internal/private_symbols.txt +++ /dev/null @@ -1,639 +0,0 @@ -a0dalloc -a0get -a0malloc -arena_aalloc -arena_alloc_junk_small -arena_basic_stats_merge -arena_bin_index -arena_bin_info -arena_bitselm_get_const -arena_bitselm_get_mutable -arena_boot -arena_choose -arena_choose_hard -arena_choose_impl -arena_chunk_alloc_huge -arena_chunk_cache_maybe_insert -arena_chunk_cache_maybe_remove -arena_chunk_dalloc_huge -arena_chunk_ralloc_huge_expand -arena_chunk_ralloc_huge_shrink -arena_chunk_ralloc_huge_similar -arena_cleanup -arena_dalloc -arena_dalloc_bin -arena_dalloc_bin_junked_locked -arena_dalloc_junk_large -arena_dalloc_junk_small -arena_dalloc_large -arena_dalloc_large_junked_locked -arena_dalloc_small -arena_decay_tick -arena_decay_ticks -arena_decay_time_default_get -arena_decay_time_default_set -arena_decay_time_get -arena_decay_time_set -arena_dss_prec_get -arena_dss_prec_set -arena_extent_sn_next -arena_get -arena_ichoose -arena_init -arena_lg_dirty_mult_default_get -arena_lg_dirty_mult_default_set -arena_lg_dirty_mult_get -arena_lg_dirty_mult_set -arena_malloc -arena_malloc_hard -arena_malloc_large -arena_mapbits_allocated_get -arena_mapbits_binind_get -arena_mapbits_decommitted_get -arena_mapbits_dirty_get -arena_mapbits_get -arena_mapbits_internal_set -arena_mapbits_large_binind_set -arena_mapbits_large_get -arena_mapbits_large_set -arena_mapbits_large_size_get -arena_mapbits_size_decode -arena_mapbits_size_encode -arena_mapbits_small_runind_get -arena_mapbits_small_set -arena_mapbits_unallocated_set -arena_mapbits_unallocated_size_get -arena_mapbits_unallocated_size_set -arena_mapbits_unzeroed_get -arena_mapbitsp_get_const -arena_mapbitsp_get_mutable -arena_mapbitsp_read -arena_mapbitsp_write -arena_maxrun -arena_maybe_purge -arena_metadata_allocated_add -arena_metadata_allocated_get -arena_metadata_allocated_sub -arena_migrate -arena_miscelm_get_const -arena_miscelm_get_mutable -arena_miscelm_to_pageind -arena_miscelm_to_rpages -arena_new -arena_node_alloc -arena_node_dalloc -arena_nthreads_dec -arena_nthreads_get -arena_nthreads_inc -arena_palloc -arena_postfork_child -arena_postfork_parent -arena_prefork0 -arena_prefork1 -arena_prefork2 -arena_prefork3 -arena_prof_accum -arena_prof_accum_impl -arena_prof_accum_locked -arena_prof_promoted -arena_prof_tctx_get -arena_prof_tctx_reset -arena_prof_tctx_set -arena_ptr_small_binind_get -arena_purge -arena_quarantine_junk_small -arena_ralloc -arena_ralloc_junk_large -arena_ralloc_no_move -arena_rd_to_miscelm -arena_redzone_corruption -arena_reset -arena_run_regind -arena_run_to_miscelm -arena_salloc -arena_sdalloc -arena_stats_merge -arena_tcache_fill_small -arena_tdata_get -arena_tdata_get_hard -arenas -arenas_tdata_bypass_cleanup -arenas_tdata_cleanup -atomic_add_p -atomic_add_u -atomic_add_uint32 -atomic_add_uint64 -atomic_add_z -atomic_cas_p -atomic_cas_u -atomic_cas_uint32 -atomic_cas_uint64 -atomic_cas_z -atomic_sub_p -atomic_sub_u -atomic_sub_uint32 -atomic_sub_uint64 -atomic_sub_z -atomic_write_p -atomic_write_u -atomic_write_uint32 -atomic_write_uint64 -atomic_write_z -base_alloc -base_boot -base_postfork_child -base_postfork_parent -base_prefork -base_stats_get -bitmap_full -bitmap_get -bitmap_info_init -bitmap_init -bitmap_set -bitmap_sfu -bitmap_size -bitmap_unset -bootstrap_calloc -bootstrap_free -bootstrap_malloc -bt_init -buferror -chunk_alloc_base -chunk_alloc_cache -chunk_alloc_dss -chunk_alloc_mmap -chunk_alloc_wrapper -chunk_boot -chunk_dalloc_cache -chunk_dalloc_mmap -chunk_dalloc_wrapper -chunk_deregister -chunk_dss_boot -chunk_dss_mergeable -chunk_dss_prec_get -chunk_dss_prec_set -chunk_hooks_default -chunk_hooks_get -chunk_hooks_set -chunk_in_dss -chunk_lookup -chunk_npages -chunk_purge_wrapper -chunk_register -chunks_rtree -chunksize -chunksize_mask -ckh_count -ckh_delete -ckh_insert -ckh_iter -ckh_new -ckh_pointer_hash -ckh_pointer_keycomp -ckh_remove -ckh_search -ckh_string_hash -ckh_string_keycomp -ctl_boot -ctl_bymib -ctl_byname -ctl_nametomib -ctl_postfork_child -ctl_postfork_parent -ctl_prefork -decay_ticker_get -dss_prec_names -extent_node_achunk_get -extent_node_achunk_set -extent_node_addr_get -extent_node_addr_set -extent_node_arena_get -extent_node_arena_set -extent_node_committed_get -extent_node_committed_set -extent_node_dirty_insert -extent_node_dirty_linkage_init -extent_node_dirty_remove -extent_node_init -extent_node_prof_tctx_get -extent_node_prof_tctx_set -extent_node_size_get -extent_node_size_set -extent_node_sn_get -extent_node_sn_set -extent_node_zeroed_get -extent_node_zeroed_set -extent_size_quantize_ceil -extent_size_quantize_floor -extent_tree_ad_destroy -extent_tree_ad_destroy_recurse -extent_tree_ad_empty -extent_tree_ad_first -extent_tree_ad_insert -extent_tree_ad_iter -extent_tree_ad_iter_recurse -extent_tree_ad_iter_start -extent_tree_ad_last -extent_tree_ad_new -extent_tree_ad_next -extent_tree_ad_nsearch -extent_tree_ad_prev -extent_tree_ad_psearch -extent_tree_ad_remove -extent_tree_ad_reverse_iter -extent_tree_ad_reverse_iter_recurse -extent_tree_ad_reverse_iter_start -extent_tree_ad_search -extent_tree_szsnad_destroy -extent_tree_szsnad_destroy_recurse -extent_tree_szsnad_empty -extent_tree_szsnad_first -extent_tree_szsnad_insert -extent_tree_szsnad_iter -extent_tree_szsnad_iter_recurse -extent_tree_szsnad_iter_start -extent_tree_szsnad_last -extent_tree_szsnad_new -extent_tree_szsnad_next -extent_tree_szsnad_nsearch -extent_tree_szsnad_prev -extent_tree_szsnad_psearch -extent_tree_szsnad_remove -extent_tree_szsnad_reverse_iter -extent_tree_szsnad_reverse_iter_recurse -extent_tree_szsnad_reverse_iter_start -extent_tree_szsnad_search -ffs_llu -ffs_lu -ffs_u -ffs_u32 -ffs_u64 -ffs_zu -get_errno -hash -hash_fmix_32 -hash_fmix_64 -hash_get_block_32 -hash_get_block_64 -hash_rotl_32 -hash_rotl_64 -hash_x64_128 -hash_x86_128 -hash_x86_32 -huge_aalloc -huge_dalloc -huge_dalloc_junk -huge_malloc -huge_palloc -huge_prof_tctx_get -huge_prof_tctx_reset -huge_prof_tctx_set -huge_ralloc -huge_ralloc_no_move -huge_salloc -iaalloc -ialloc -iallocztm -iarena_cleanup -idalloc -idalloctm -in_valgrind -index2size -index2size_compute -index2size_lookup -index2size_tab -ipalloc -ipalloct -ipallocztm -iqalloc -iralloc -iralloct -iralloct_realign -isalloc -isdalloct -isqalloc -isthreaded -ivsalloc -ixalloc -jemalloc_postfork_child -jemalloc_postfork_parent -jemalloc_prefork -large_maxclass -lg_floor -lg_prof_sample -malloc_cprintf -malloc_mutex_assert_not_owner -malloc_mutex_assert_owner -malloc_mutex_boot -malloc_mutex_init -malloc_mutex_lock -malloc_mutex_postfork_child -malloc_mutex_postfork_parent -malloc_mutex_prefork -malloc_mutex_unlock -malloc_printf -malloc_snprintf -malloc_strtoumax -malloc_tsd_boot0 -malloc_tsd_boot1 -malloc_tsd_cleanup_register -malloc_tsd_dalloc -malloc_tsd_malloc -malloc_tsd_no_cleanup -malloc_vcprintf -malloc_vsnprintf -malloc_write -map_bias -map_misc_offset -mb_write -narenas_auto -narenas_tdata_cleanup -narenas_total_get -ncpus -nhbins -nhclasses -nlclasses -nstime_add -nstime_compare -nstime_copy -nstime_divide -nstime_idivide -nstime_imultiply -nstime_init -nstime_init2 -nstime_monotonic -nstime_ns -nstime_nsec -nstime_sec -nstime_subtract -nstime_update -opt_abort -opt_decay_time -opt_dss -opt_junk -opt_junk_alloc -opt_junk_free -opt_lg_chunk -opt_lg_dirty_mult -opt_lg_prof_interval -opt_lg_prof_sample -opt_lg_tcache_max -opt_narenas -opt_prof -opt_prof_accum -opt_prof_active -opt_prof_final -opt_prof_gdump -opt_prof_leak -opt_prof_prefix -opt_prof_thread_active_init -opt_purge -opt_quarantine -opt_redzone -opt_stats_print -opt_tcache -opt_thp -opt_utrace -opt_xmalloc -opt_zero -p2rz -pages_boot -pages_commit -pages_decommit -pages_huge -pages_map -pages_nohuge -pages_purge -pages_trim -pages_unmap -pind2sz -pind2sz_compute -pind2sz_lookup -pind2sz_tab -pow2_ceil_u32 -pow2_ceil_u64 -pow2_ceil_zu -prng_lg_range_u32 -prng_lg_range_u64 -prng_lg_range_zu -prng_range_u32 -prng_range_u64 -prng_range_zu -prng_state_next_u32 -prng_state_next_u64 -prng_state_next_zu -prof_active -prof_active_get -prof_active_get_unlocked -prof_active_set -prof_alloc_prep -prof_alloc_rollback -prof_backtrace -prof_boot0 -prof_boot1 -prof_boot2 -prof_bt_count -prof_dump_header -prof_dump_open -prof_free -prof_free_sampled_object -prof_gdump -prof_gdump_get -prof_gdump_get_unlocked -prof_gdump_set -prof_gdump_val -prof_idump -prof_interval -prof_lookup -prof_malloc -prof_malloc_sample_object -prof_mdump -prof_postfork_child -prof_postfork_parent -prof_prefork0 -prof_prefork1 -prof_realloc -prof_reset -prof_sample_accum_update -prof_sample_threshold_update -prof_tctx_get -prof_tctx_reset -prof_tctx_set -prof_tdata_cleanup -prof_tdata_count -prof_tdata_get -prof_tdata_init -prof_tdata_reinit -prof_thread_active_get -prof_thread_active_init_get -prof_thread_active_init_set -prof_thread_active_set -prof_thread_name_get -prof_thread_name_set -psz2ind -psz2u -purge_mode_names -quarantine -quarantine_alloc_hook -quarantine_alloc_hook_work -quarantine_cleanup -rtree_child_read -rtree_child_read_hard -rtree_child_tryread -rtree_delete -rtree_get -rtree_new -rtree_node_valid -rtree_set -rtree_start_level -rtree_subkey -rtree_subtree_read -rtree_subtree_read_hard -rtree_subtree_tryread -rtree_val_read -rtree_val_write -run_quantize_ceil -run_quantize_floor -s2u -s2u_compute -s2u_lookup -sa2u -set_errno -size2index -size2index_compute -size2index_lookup -size2index_tab -spin_adaptive -spin_init -stats_cactive -stats_cactive_add -stats_cactive_get -stats_cactive_sub -stats_print -tcache_alloc_easy -tcache_alloc_large -tcache_alloc_small -tcache_alloc_small_hard -tcache_arena_reassociate -tcache_bin_flush_large -tcache_bin_flush_small -tcache_bin_info -tcache_boot -tcache_cleanup -tcache_create -tcache_dalloc_large -tcache_dalloc_small -tcache_enabled_cleanup -tcache_enabled_get -tcache_enabled_set -tcache_event -tcache_event_hard -tcache_flush -tcache_get -tcache_get_hard -tcache_maxclass -tcache_postfork_child -tcache_postfork_parent -tcache_prefork -tcache_salloc -tcache_stats_merge -tcaches -tcaches_create -tcaches_destroy -tcaches_flush -tcaches_get -thread_allocated_cleanup -thread_deallocated_cleanup -ticker_copy -ticker_init -ticker_read -ticker_tick -ticker_ticks -tsd_arena_get -tsd_arena_set -tsd_arenap_get -tsd_arenas_tdata_bypass_get -tsd_arenas_tdata_bypass_set -tsd_arenas_tdata_bypassp_get -tsd_arenas_tdata_get -tsd_arenas_tdata_set -tsd_arenas_tdatap_get -tsd_boot -tsd_boot0 -tsd_boot1 -tsd_booted -tsd_booted_get -tsd_cleanup -tsd_cleanup_wrapper -tsd_fetch -tsd_fetch_impl -tsd_get -tsd_get_allocates -tsd_iarena_get -tsd_iarena_set -tsd_iarenap_get -tsd_initialized -tsd_init_check_recursion -tsd_init_finish -tsd_init_head -tsd_narenas_tdata_get -tsd_narenas_tdata_set -tsd_narenas_tdatap_get -tsd_wrapper_get -tsd_wrapper_set -tsd_nominal -tsd_prof_tdata_get -tsd_prof_tdata_set -tsd_prof_tdatap_get -tsd_quarantine_get -tsd_quarantine_set -tsd_quarantinep_get -tsd_set -tsd_tcache_enabled_get -tsd_tcache_enabled_set -tsd_tcache_enabledp_get -tsd_tcache_get -tsd_tcache_set -tsd_tcachep_get -tsd_thread_allocated_get -tsd_thread_allocated_set -tsd_thread_allocatedp_get -tsd_thread_deallocated_get -tsd_thread_deallocated_set -tsd_thread_deallocatedp_get -tsd_tls -tsd_tsd -tsd_tsdn -tsd_witness_fork_get -tsd_witness_fork_set -tsd_witness_forkp_get -tsd_witnesses_get -tsd_witnesses_set -tsd_witnessesp_get -tsdn_fetch -tsdn_null -tsdn_tsd -u2rz -valgrind_freelike_block -valgrind_make_mem_defined -valgrind_make_mem_noaccess -valgrind_make_mem_undefined -witness_assert_depth -witness_assert_depth_to_rank -witness_assert_lockless -witness_assert_not_owner -witness_assert_owner -witness_depth_error -witness_fork_cleanup -witness_init -witness_lock -witness_lock_error -witness_not_owner_error -witness_owner -witness_owner_error -witness_postfork_child -witness_postfork_parent -witness_prefork -witness_unlock -witnesses_cleanup -zone_register diff --git a/include/jemalloc/internal/private_unnamespace.sh b/include/jemalloc/internal/private_unnamespace.sh deleted file mode 100755 index 23fed8e..0000000 --- a/include/jemalloc/internal/private_unnamespace.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -for symbol in `cat $1` ; do - echo "#undef ${symbol}" -done diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index c2bda19..15cc2d1 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -1,5 +1,8 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_PRNG_H +#define JEMALLOC_INTERNAL_PRNG_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bit_util.h" /* * Simple linear congruential pseudo-random number generator: @@ -20,95 +23,71 @@ * bits. */ -#define PRNG_A_32 UINT32_C(1103515241) -#define PRNG_C_32 UINT32_C(12347) - -#define PRNG_A_64 UINT64_C(6364136223846793005) -#define PRNG_C_64 UINT64_C(1442695040888963407) - -#endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ +/* INTERNAL DEFINITIONS -- IGNORE */ /******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t prng_state_next_u32(uint32_t state); -uint64_t prng_state_next_u64(uint64_t state); -size_t prng_state_next_zu(size_t state); +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) -uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, - bool atomic); -uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); -size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); - -uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); -uint64_t prng_range_u64(uint64_t *state, uint64_t range); -size_t prng_range_zu(size_t *state, size_t range, bool atomic); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) JEMALLOC_ALWAYS_INLINE uint32_t -prng_state_next_u32(uint32_t state) -{ - - return ((state * PRNG_A_32) + PRNG_C_32); +prng_state_next_u32(uint32_t state) { + return (state * PRNG_A_32) + PRNG_C_32; } JEMALLOC_ALWAYS_INLINE uint64_t -prng_state_next_u64(uint64_t state) -{ - - return ((state * PRNG_A_64) + PRNG_C_64); +prng_state_next_u64(uint64_t state) { + return (state * PRNG_A_64) + PRNG_C_64; } JEMALLOC_ALWAYS_INLINE size_t -prng_state_next_zu(size_t state) -{ - +prng_state_next_zu(size_t state) { #if LG_SIZEOF_PTR == 2 - return ((state * PRNG_A_32) + PRNG_C_32); + return (state * PRNG_A_32) + PRNG_C_32; #elif LG_SIZEOF_PTR == 3 - return ((state * PRNG_A_64) + PRNG_C_64); + return (state * PRNG_A_64) + PRNG_C_64; #else #error Unsupported pointer size #endif } +/******************************************************************************/ +/* BEGIN PUBLIC API */ +/******************************************************************************/ + +/* + * The prng_lg_range functions give a uniform int in the half-open range [0, + * 2**lg_range). If atomic is true, they do so safely from multiple threads. + * Multithreaded 64-bit prngs aren't supported. + */ + JEMALLOC_ALWAYS_INLINE uint32_t -prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) -{ - uint32_t ret, state1; +prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) { + uint32_t ret, state0, state1; assert(lg_range > 0); assert(lg_range <= 32); - if (atomic) { - uint32_t state0; + state0 = atomic_load_u32(state, ATOMIC_RELAXED); + if (atomic) { do { - state0 = atomic_read_uint32(state); state1 = prng_state_next_u32(state0); - } while (atomic_cas_uint32(state, state0, state1)); + } while (!atomic_compare_exchange_weak_u32(state, &state0, + state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); } else { - state1 = prng_state_next_u32(*state); - *state = state1; + state1 = prng_state_next_u32(state0); + atomic_store_u32(state, state1, ATOMIC_RELAXED); } ret = state1 >> (32 - lg_range); - return (ret); + return ret; } -/* 64-bit atomic operations cannot be supported on all relevant platforms. */ JEMALLOC_ALWAYS_INLINE uint64_t -prng_lg_range_u64(uint64_t *state, unsigned lg_range) -{ +prng_lg_range_u64(uint64_t *state, unsigned lg_range) { uint64_t ret, state1; assert(lg_range > 0); @@ -118,36 +97,39 @@ prng_lg_range_u64(uint64_t *state, unsigned lg_range) *state = state1; ret = state1 >> (64 - lg_range); - return (ret); + return ret; } JEMALLOC_ALWAYS_INLINE size_t -prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) -{ - size_t ret, state1; +prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) { + size_t ret, state0, state1; assert(lg_range > 0); assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); - if (atomic) { - size_t state0; + state0 = atomic_load_zu(state, ATOMIC_RELAXED); + if (atomic) { do { - state0 = atomic_read_z(state); state1 = prng_state_next_zu(state0); - } while (atomic_cas_z(state, state0, state1)); + } while (atomic_compare_exchange_weak_zu(state, &state0, + state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); } else { - state1 = prng_state_next_zu(*state); - *state = state1; + state1 = prng_state_next_zu(state0); + atomic_store_zu(state, state1, ATOMIC_RELAXED); } ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); - return (ret); + return ret; } +/* + * The prng_range functions behave like the prng_lg_range, but return a result + * in [0, range) instead of [0, 2**lg_range). + */ + JEMALLOC_ALWAYS_INLINE uint32_t -prng_range_u32(uint32_t *state, uint32_t range, bool atomic) -{ +prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) { uint32_t ret; unsigned lg_range; @@ -161,12 +143,11 @@ prng_range_u32(uint32_t *state, uint32_t range, bool atomic) ret = prng_lg_range_u32(state, lg_range, atomic); } while (ret >= range); - return (ret); + return ret; } JEMALLOC_ALWAYS_INLINE uint64_t -prng_range_u64(uint64_t *state, uint64_t range) -{ +prng_range_u64(uint64_t *state, uint64_t range) { uint64_t ret; unsigned lg_range; @@ -180,12 +161,11 @@ prng_range_u64(uint64_t *state, uint64_t range) ret = prng_lg_range_u64(state, lg_range); } while (ret >= range); - return (ret); + return ret; } JEMALLOC_ALWAYS_INLINE size_t -prng_range_zu(size_t *state, size_t range, bool atomic) -{ +prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) { size_t ret; unsigned lg_range; @@ -199,9 +179,7 @@ prng_range_zu(size_t *state, size_t range, bool atomic) ret = prng_lg_range_zu(state, lg_range, atomic); } while (ret >= range); - return (ret); + return ret; } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_PRNG_H */ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h deleted file mode 100644 index 8293b71..0000000 --- a/include/jemalloc/internal/prof.h +++ /dev/null @@ -1,547 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#define PROF_BT_MAX 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUFSIZE 65536 - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* - * prof_tdata pointers close to NULL are used to encode state information that - * is used for cleaning up during thread shutdown. - */ -#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) -#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) -#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - /* Included in heap profile dumps if non-NULL. */ - char *thread_name; - - bool attached; - bool expired; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Sampling state. */ - uint64_t prng_state; - uint64_t bytes_until_sample; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void *vec[PROF_BT_MAX]; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[ - /* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); -void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); -void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt); -prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); -#ifdef JEMALLOC_JET -size_t prof_tdata_count(void); -size_t prof_bt_count(void); -const prof_cnt_t *prof_cnt_all(void); -typedef int (prof_dump_open_t)(bool, const char *); -extern prof_dump_open_t *prof_dump_open; -typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); -extern prof_dump_header_t *prof_dump_header; -#endif -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); -void prof_reset(tsd_t *tsd, size_t lg_sample); -void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(tsd_t *tsd); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); -void prof_sample_threshold_update(prof_tdata_t *tdata); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool prof_active_get_unlocked(void); -bool prof_gdump_get_unlocked(void); -prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); -prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr); -void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *tctx); -bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, - prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, - bool update); -void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, - size_t old_usize, prof_tctx_t *old_tctx); -void prof_free(tsd_t *tsd, const void *ptr, size_t usize); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) -{ - - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return (prof_active); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_gdump_get_unlocked(void) -{ - - /* - * No locking is used when reading prof_gdump_val in the fast path, so - * there are no guarantees regarding how long it will take for all - * threads to notice state changes. - */ - return (prof_gdump_val); -} - -JEMALLOC_ALWAYS_INLINE prof_tdata_t * -prof_tdata_get(tsd_t *tsd, bool create) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = tsd_prof_tdata_get(tsd); - if (create) { - if (unlikely(tdata == NULL)) { - if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd); - tsd_prof_tdata_set(tsd, tdata); - } - } else if (unlikely(tdata->expired)) { - tdata = prof_tdata_reinit(tsd, tdata); - tsd_prof_tdata_set(tsd, tdata); - } - assert(tdata == NULL || tdata->attached); - } - - return (tdata); -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const void *ptr) -{ - - cassert(config_prof); - assert(ptr != NULL); - - return (arena_prof_tctx_get(tsdn, ptr)); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_set(tsdn, ptr, usize, tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, - prof_tctx_t *old_tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = prof_tdata_get(tsd, true); - if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) - tdata = NULL; - - if (tdata_out != NULL) - *tdata_out = tdata; - - if (unlikely(tdata == NULL)) - return (true); - - if (likely(tdata->bytes_until_sample >= usize)) { - if (update) - tdata->bytes_until_sample -= usize; - return (true); - } else { - /* Compute new sample threshold. */ - if (update) - prof_sample_threshold_update(tdata); - return (!tdata->active); - } -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) -{ - prof_tctx_t *ret; - prof_tdata_t *tdata; - prof_bt_t bt; - - assert(usize == s2u(usize)); - - if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, - &tdata))) - ret = (prof_tctx_t *)(uintptr_t)1U; - else { - bt_init(&bt, tdata->vec); - prof_backtrace(&bt); - ret = prof_lookup(tsd, &bt); - } - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - assert(usize == isalloc(tsdn, ptr, true)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_malloc_sample_object(tsdn, ptr, usize, tctx); - else - prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U); -} - -JEMALLOC_ALWAYS_INLINE void -prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, - bool prof_active, bool updated, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx) -{ - bool sampled, old_sampled; - - cassert(config_prof); - assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - - if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); - if (prof_sample_accum_update(tsd, usize, true, NULL)) { - /* - * Don't sample. The usize passed to prof_alloc_prep() - * was larger than what actually got allocated, so a - * backtrace was captured for this allocation, even - * though its actual usize was insufficient to cross the - * sample threshold. - */ - prof_alloc_rollback(tsd, tctx, true); - tctx = (prof_tctx_t *)(uintptr_t)1U; - } - } - - sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); - - if (unlikely(sampled)) - prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); - else - prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx); - - if (unlikely(old_sampled)) - prof_free_sampled_object(tsd, old_usize, old_tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_free(tsd_t *tsd, const void *ptr, size_t usize) -{ - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); - - cassert(config_prof); - assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_free_sampled_object(tsd, usize, tctx); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h new file mode 100644 index 0000000..0434869 --- /dev/null +++ b/include/jemalloc/internal/prof_externs.h @@ -0,0 +1,92 @@ +#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H +#define JEMALLOC_INTERNAL_PROF_EXTERNS_H + +#include "jemalloc/internal/mutex.h" + +extern malloc_mutex_t bt2gctx_mtx; + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_tdata_count(void); +size_t prof_bt_count(void); +#endif +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *JET_MUTABLE prof_dump_open; + +typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); +extern prof_dump_header_t *JET_MUTABLE prof_dump_header; +#ifdef JEMALLOC_JET +void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, + uint64_t *accumbytes); +#endif +bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); +void prof_sample_threshold_update(prof_tdata_t *tdata); + +#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h new file mode 100644 index 0000000..eda6839 --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_a.h @@ -0,0 +1,72 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H +#define JEMALLOC_INTERNAL_PROF_INLINES_A_H + +#include "jemalloc/internal/mutex.h" + +static inline bool +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); + do { + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, + a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif + return overflow; +} + +static inline void +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive prof_accumbytes increase as + * possible without underflowing. Interval-triggered dumps occur + * slightly more often than intended as a result of incomplete + * canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); + do { + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - + usize) : 0; + } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, + a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : + 0; + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif +} + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h new file mode 100644 index 0000000..d670cb7 --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -0,0 +1,217 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H +#define JEMALLOC_INTERNAL_PROF_INLINES_B_H + +#include "jemalloc/internal/sz.h" + +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) { + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return prof_active; +} + +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) { + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return prof_gdump_val; +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } + + return tdata; +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + return arena_prof_tctx_get(tsdn, ptr, alloc_ctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(tsdn, ptr, tctx); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = prof_tdata_get(tsd, true); + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { + tdata = NULL; + } + + if (tdata_out != NULL) { + *tdata_out = tdata; + } + + if (unlikely(tdata == NULL)) { + return true; + } + + if (likely(tdata->bytes_until_sample >= usize)) { + if (update) { + tdata->bytes_until_sample -= usize; + } + return true; + } else { + if (tsd_reentrancy_level_get(tsd) > 0) { + return true; + } + /* Compute new sample threshold. */ + if (update) { + prof_sample_threshold_update(tdata); + } + return !tdata->active; + } +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) { + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == sz_s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) { + ret = (prof_tctx_t *)(uintptr_t)1U; + } else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return ret; +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx, + prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(tsdn, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_malloc_sample_object(tsdn, ptr, usize, tctx); + } else { + prof_tctx_set(tsdn, ptr, usize, alloc_ctx, + (prof_tctx_t *)(uintptr_t)1U); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) { + bool sampled, old_sampled, moved; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); + + if (prof_active && !updated && ptr != NULL) { + assert(usize == isalloc(tsd_tsdn(tsd), ptr)); + if (prof_sample_accum_update(tsd, usize, true, NULL)) { + /* + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. + */ + prof_alloc_rollback(tsd, tctx, true); + tctx = (prof_tctx_t *)(uintptr_t)1U; + } + } + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + moved = (ptr != old_ptr); + + if (unlikely(sampled)) { + prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); + } else if (moved) { + prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL, + (prof_tctx_t *)(uintptr_t)1U); + } else if (unlikely(old_sampled)) { + /* + * prof_tctx_set() would work for the !moved case as well, but + * prof_tctx_reset() is slightly cheaper, and the proper thing + * to do here in the presence of explicit knowledge re: moved + * state. + */ + prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx); + } else { + assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) == + (uintptr_t)1U); + } + + /* + * The prof_free_sampled_object() call must come after the + * prof_malloc_sample_object() call, because tctx and old_tctx may be + * the same, in which case reversing the call order could cause the tctx + * to be prematurely destroyed as a side effect of momentarily zeroed + * counters. + */ + if (unlikely(old_sampled)) { + prof_free_sampled_object(tsd, old_usize, old_tctx); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) { + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + + cassert(config_prof); + assert(usize == isalloc(tsd_tsdn(tsd), ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_free_sampled_object(tsd, usize, tctx); + } +} + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */ diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h new file mode 100644 index 0000000..0d58ae1 --- /dev/null +++ b/include/jemalloc/internal/prof_structs.h @@ -0,0 +1,201 @@ +#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H +#define JEMALLOC_INTERNAL_PROF_STRUCTS_H + +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/rb.h" + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_accum_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; + uint64_t accumbytes; +#else + atomic_u64_t accumbytes; +#endif +}; + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; + + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t bytes_until_sample; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h new file mode 100644 index 0000000..1eff995 --- /dev/null +++ b/include/jemalloc/internal/prof_types.h @@ -0,0 +1,56 @@ +#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H +#define JEMALLOC_INTERNAL_PROF_TYPES_H + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_accum_s prof_accum_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + +#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/public_namespace.sh b/include/jemalloc/internal/public_namespace.sh index 362109f..4d415ba 100755 --- a/include/jemalloc/internal/public_namespace.sh +++ b/include/jemalloc/internal/public_namespace.sh @@ -2,5 +2,5 @@ for nm in `cat $1` ; do n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` - echo "#define je_${n} JEMALLOC_N(${n})" + echo "#define je_${n} JEMALLOC_N(${n})" done diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h index 1834bb8..8029040 100644 --- a/include/jemalloc/internal/ql.h +++ b/include/jemalloc/internal/ql.h @@ -1,59 +1,64 @@ +#ifndef JEMALLOC_INTERNAL_QL_H +#define JEMALLOC_INTERNAL_QL_H + +#include "jemalloc/internal/qr.h" + /* List definitions. */ -#define ql_head(a_type) \ +#define ql_head(a_type) \ struct { \ a_type *qlh_first; \ } -#define ql_head_initializer(a_head) {NULL} +#define ql_head_initializer(a_head) {NULL} -#define ql_elm(a_type) qr(a_type) +#define ql_elm(a_type) qr(a_type) /* List functions. */ -#define ql_new(a_head) do { \ +#define ql_new(a_head) do { \ (a_head)->qlh_first = NULL; \ } while (0) -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) -#define ql_first(a_head) ((a_head)->qlh_first) +#define ql_first(a_head) ((a_head)->qlh_first) -#define ql_last(a_head, a_field) \ +#define ql_last(a_head, a_field) \ ((ql_first(a_head) != NULL) \ ? qr_prev(ql_first(a_head), a_field) : NULL) -#define ql_next(a_head, a_elm, a_field) \ +#define ql_next(a_head, a_elm, a_field) \ ((ql_last(a_head, a_field) != (a_elm)) \ ? qr_next((a_elm), a_field) : NULL) -#define ql_prev(a_head, a_elm, a_field) \ +#define ql_prev(a_head, a_elm, a_field) \ ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ : NULL) -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ qr_before_insert((a_qlelm), (a_elm), a_field); \ if (ql_first(a_head) == (a_qlelm)) { \ ql_first(a_head) = (a_elm); \ } \ } while (0) -#define ql_after_insert(a_qlelm, a_elm, a_field) \ +#define ql_after_insert(a_qlelm, a_elm, a_field) \ qr_after_insert((a_qlelm), (a_elm), a_field) -#define ql_head_insert(a_head, a_elm, a_field) do { \ +#define ql_head_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = (a_elm); \ } while (0) -#define ql_tail_insert(a_head, a_elm, a_field) do { \ +#define ql_tail_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = qr_next((a_elm), a_field); \ } while (0) -#define ql_remove(a_head, a_elm, a_field) do { \ +#define ql_remove(a_head, a_elm, a_field) do { \ if (ql_first(a_head) == (a_elm)) { \ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ } \ @@ -64,18 +69,20 @@ struct { \ } \ } while (0) -#define ql_head_remove(a_head, a_type, a_field) do { \ +#define ql_head_remove(a_head, a_type, a_field) do { \ a_type *t = ql_first(a_head); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_tail_remove(a_head, a_type, a_field) do { \ +#define ql_tail_remove(a_head, a_type, a_field) do { \ a_type *t = ql_last(a_head, a_field); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_foreach(a_var, a_head, a_field) \ +#define ql_foreach(a_var, a_head, a_field) \ qr_foreach((a_var), ql_first(a_head), a_field) -#define ql_reverse_foreach(a_var, a_head, a_field) \ +#define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) + +#endif /* JEMALLOC_INTERNAL_QL_H */ diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h index 0fbaec2..1e1056b 100644 --- a/include/jemalloc/internal/qr.h +++ b/include/jemalloc/internal/qr.h @@ -1,38 +1,39 @@ +#ifndef JEMALLOC_INTERNAL_QR_H +#define JEMALLOC_INTERNAL_QR_H + /* Ring definitions. */ -#define qr(a_type) \ +#define qr(a_type) \ struct { \ a_type *qre_next; \ a_type *qre_prev; \ } /* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ +#define qr_new(a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qr); \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ (a_qr)->a_field.qre_next = (a_qrelm); \ (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ (a_qrelm)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_after_insert(a_qrelm, a_qr, a_field) \ - do \ - { \ +#define qr_after_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ (a_qr)->a_field.qre_prev = (a_qrelm); \ (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ (a_qrelm)->a_field.qre_next = (a_qr); \ - } while (0) +} while (0) -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ - void *t; \ +#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \ + a_type *t; \ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ t = (a_qr_a)->a_field.qre_prev; \ @@ -44,10 +45,10 @@ struct { \ * qr_meld() and qr_split() are functionally equivalent, so there's no need to * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ - qr_meld((a_qr_a), (a_qr_b), a_field) +#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_type, a_field) -#define qr_remove(a_qr, a_field) do { \ +#define qr_remove(a_qr, a_field) do { \ (a_qr)->a_field.qre_prev->a_field.qre_next \ = (a_qr)->a_field.qre_next; \ (a_qr)->a_field.qre_next->a_field.qre_prev \ @@ -56,14 +57,16 @@ struct { \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_foreach(var, a_qr, a_field) \ +#define qr_foreach(var, a_qr, a_field) \ for ((var) = (a_qr); \ (var) != NULL; \ (var) = (((var)->a_field.qre_next != (a_qr)) \ ? (var)->a_field.qre_next : NULL)) -#define qr_reverse_foreach(var, a_qr, a_field) \ +#define qr_reverse_foreach(var, a_qr, a_field) \ for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ ? (var)->a_field.qre_prev : NULL)) + +#endif /* JEMALLOC_INTERNAL_QR_H */ diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h deleted file mode 100644 index ae60739..0000000 --- a/include/jemalloc/internal/quarantine.h +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct quarantine_obj_s quarantine_obj_t; -typedef struct quarantine_s quarantine_t; - -/* Default per thread quarantine size if valgrind is enabled. */ -#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct quarantine_obj_s { - void *ptr; - size_t usize; -}; - -struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; -#define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void quarantine_alloc_hook_work(tsd_t *tsd); -void quarantine(tsd_t *tsd, void *ptr); -void quarantine_cleanup(tsd_t *tsd); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void quarantine_alloc_hook(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -JEMALLOC_ALWAYS_INLINE void -quarantine_alloc_hook(void) -{ - tsd_t *tsd; - - assert(config_fill && opt_quarantine); - - tsd = tsd_fetch(); - if (tsd_quarantine_get(tsd) == NULL) - quarantine_alloc_hook_work(tsd); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 3770342..47fa5ca 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -20,17 +20,21 @@ */ #ifndef RB_H_ -#define RB_H_ +#define RB_H_ + +#ifndef __PGI +#define RB_COMPACT +#endif #ifdef RB_COMPACT /* Node structure. */ -#define rb_node(a_type) \ +#define rb_node(a_type) \ struct { \ a_type *rbn_left; \ a_type *rbn_right_red; \ } #else -#define rb_node(a_type) \ +#define rb_node(a_type) \ struct { \ a_type *rbn_left; \ a_type *rbn_right; \ @@ -39,48 +43,48 @@ struct { \ #endif /* Root structure. */ -#define rb_tree(a_type) \ +#define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ } /* Left accessors. */ -#define rbtn_left_get(a_type, a_field, a_node) \ +#define rbtn_left_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_left) -#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ (a_node)->a_field.rbn_left = a_left; \ } while (0) #ifdef RB_COMPACT /* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ +#define rbtn_right_get(a_type, a_field, a_node) \ ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ & ((ssize_t)-2))) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ } while (0) /* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ +#define rbtn_red_get(a_type, a_field, a_node) \ ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ & ((size_t)1))) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ | ((ssize_t)a_red)); \ } while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ +#define rbtn_red_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ } while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ +#define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) /* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ /* Bookkeeping bit cannot be used by node pointer. */ \ assert(((uintptr_t)(a_node) & 0x1) == 0); \ rbtn_left_set(a_type, a_field, (a_node), NULL); \ @@ -89,27 +93,27 @@ struct { \ } while (0) #else /* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ +#define rbtn_right_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_right) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ (a_node)->a_field.rbn_right = a_right; \ } while (0) /* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ +#define rbtn_red_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_red) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ (a_node)->a_field.rbn_red = (a_red); \ } while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ +#define rbtn_red_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = true; \ } while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ +#define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) /* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ rbtn_left_set(a_type, a_field, (a_node), NULL); \ rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ @@ -117,12 +121,12 @@ struct { \ #endif /* Tree initializer. */ -#define rb_new(a_type, a_field, a_rbt) do { \ +#define rb_new(a_type, a_field, a_rbt) do { \ (a_rbt)->rbt_root = NULL; \ } while (0) /* Internal utility macros. */ -#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ if ((r_node) != NULL) { \ for (; \ @@ -132,7 +136,7 @@ struct { \ } \ } while (0) -#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ if ((r_node) != NULL) { \ for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ @@ -141,14 +145,14 @@ struct { \ } \ } while (0) -#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ rbtn_right_set(a_type, a_field, (a_node), \ rbtn_left_get(a_type, a_field, (r_node))); \ rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ } while (0) -#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ rbtn_left_set(a_type, a_field, (a_node), \ rbtn_right_get(a_type, a_field, (r_node))); \ @@ -160,7 +164,7 @@ struct { \ * functions generated by an equivalently parameterized call to rb_gen(). */ -#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree); \ a_attr bool \ @@ -335,7 +339,7 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ * has begun. * arg : Opaque pointer passed to cb(). */ -#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree) { \ rb_new(a_type, a_field, rbtree); \ @@ -348,13 +352,13 @@ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ @@ -379,7 +383,7 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ assert(tnode != NULL); \ } \ } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ @@ -404,7 +408,7 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ assert(tnode != NULL); \ } \ } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ @@ -419,7 +423,7 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ @@ -438,7 +442,7 @@ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ @@ -457,7 +461,7 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ - return (ret); \ + return ret; \ } \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ @@ -550,8 +554,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != NULL; \ - pathp++) { \ + for (pathp++; pathp->node != NULL; pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ pathp->node); \ @@ -873,16 +876,16 @@ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ if (node == NULL) { \ - return (NULL); \ + return NULL; \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -894,20 +897,20 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if ((ret = a_prefix##iter_start(rbtree, start, \ rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } else if (cmp > 0) { \ - return (a_prefix##iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg); \ } else { \ a_type *ret; \ if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -920,22 +923,22 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ if (node == NULL) { \ - return (NULL); \ + return NULL; \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -948,20 +951,20 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } else if (cmp < 0) { \ - return (a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } else { \ a_type *ret; \ if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -975,7 +978,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ - return (ret); \ + return ret; \ } \ a_attr void \ a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 8d0c584..b5d4db3 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -1,75 +1,72 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_H +#define JEMALLOC_INTERNAL_RTREE_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree_tsd.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd.h" + /* * This radix tree implementation is tailored to the singular purpose of - * associating metadata with chunks that are currently owned by jemalloc. + * associating metadata with extents that are currently owned by jemalloc. * ******************************************************************************* */ -#ifdef JEMALLOC_H_TYPES - -typedef struct rtree_node_elm_s rtree_node_elm_t; -typedef struct rtree_level_s rtree_level_t; -typedef struct rtree_s rtree_t; - -/* - * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the - * machine address width. - */ -#define LG_RTREE_BITS_PER_LEVEL 4 -#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) -/* Maximum rtree height. */ -#define RTREE_HEIGHT_MAX \ - ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -/* Used for two-stage lock-free node initialization. */ -#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) - -/* - * The node allocation callback function's argument is the number of contiguous - * rtree_node_elm_t structures to allocate, and the resulting memory must be - * zeroed. - */ -typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); -typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); +/* Number of high insignificant bits. */ +#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR) +/* Number of low insigificant bits. */ +#define RTREE_NLIB LG_PAGE +/* Number of significant bits. */ +#define RTREE_NSB (LG_VADDR - RTREE_NLIB) +/* Number of levels in radix tree. */ +#if RTREE_NSB <= 10 +# define RTREE_HEIGHT 1 +#elif RTREE_NSB <= 36 +# define RTREE_HEIGHT 2 +#elif RTREE_NSB <= 52 +# define RTREE_HEIGHT 3 +#else +# error Unsupported number of significant virtual address bits +#endif +/* Use compact leaf representation if virtual address encoding allows. */ +#if RTREE_NHIB >= LG_CEIL_NSIZES +# define RTREE_LEAF_COMPACT +#endif -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Needed for initialization only. */ +#define RTREE_LEAFKEY_INVALID ((uintptr_t)1) +typedef struct rtree_node_elm_s rtree_node_elm_t; struct rtree_node_elm_s { - union { - void *pun; - rtree_node_elm_t *child; - extent_node_t *val; - }; + atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */ }; -struct rtree_level_s { +struct rtree_leaf_elm_s { +#ifdef RTREE_LEAF_COMPACT /* - * A non-NULL subtree points to a subtree rooted along the hypothetical - * path to the leaf node corresponding to key 0. Depending on what keys - * have been used to store to the tree, an arbitrary combination of - * subtree pointers may remain NULL. - * - * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. - * This results in a 3-level tree, and the leftmost leaf can be directly - * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding - * 0x00000000) can be accessed via subtrees[1], and the remainder of the - * tree can be accessed via subtrees[0]. - * - * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * Single pointer-width field containing all three leaf element fields. + * For example, on a 64-bit x64 system with 48 significant virtual + * memory address bits, the index, extent, and slab fields are packed as + * such: * - * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] + * x: index + * e: extent + * b: slab * - * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] - * - * This has practical implications on x64, which currently uses only the - * lower 47 bits of virtual address space in userland, thus leaving - * subtrees[0] unused and avoiding a level of tree traversal. + * 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b */ - union { - void *subtree_pun; - rtree_node_elm_t *subtree; - }; + atomic_p_t le_bits; +#else + atomic_p_t le_extent; /* (extent_t *) */ + atomic_u_t le_szind; /* (szind_t) */ + atomic_b_t le_slab; /* (bool) */ +#endif +}; + +typedef struct rtree_level_s rtree_level_t; +struct rtree_level_s { /* Number of key bits distinguished by this level. */ unsigned bits; /* @@ -79,288 +76,399 @@ struct rtree_level_s { unsigned cumbits; }; +typedef struct rtree_s rtree_t; struct rtree_s { - rtree_node_alloc_t *alloc; - rtree_node_dalloc_t *dalloc; - unsigned height; - /* - * Precomputed table used to convert from the number of leading 0 key - * bits to which subtree level to start at. - */ - unsigned start_level[RTREE_HEIGHT_MAX]; - rtree_level_t levels[RTREE_HEIGHT_MAX]; + malloc_mutex_t init_lock; + /* Number of elements based on rtree_levels[0].bits. */ +#if RTREE_HEIGHT > 1 + rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#else + rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#endif }; -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, - rtree_node_dalloc_t *dalloc); -void rtree_delete(rtree_t *rtree); -rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, - unsigned level); -rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, - rtree_node_elm_t *elm, unsigned level); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); -uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); - -bool rtree_node_valid(rtree_node_elm_t *node); -rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm, - bool dependent); -rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, - unsigned level, bool dependent); -extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, - bool dependent); -void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, - const extent_node_t *val); -rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, - bool dependent); -rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level, - bool dependent); - -extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); -bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); +/* + * Split the bits into one to three partitions depending on number of + * significant bits. It the number of bits does not divide evenly into the + * number of levels, place one remainder bit per level starting at the leaf + * level. + */ +static const rtree_level_t rtree_levels[] = { +#if RTREE_HEIGHT == 1 + {RTREE_NSB, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 2 + {RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2}, + {RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 3 + {RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3}, + {RTREE_NSB/3 + RTREE_NSB%3/2, + RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2}, + {RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB} +#else +# error Unsupported rtree height #endif +}; + +bool rtree_new(rtree_t *rtree, bool zeroed); + +typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc; + +typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc; + +typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *); +extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc; -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -JEMALLOC_ALWAYS_INLINE unsigned -rtree_start_level(rtree_t *rtree, uintptr_t key) -{ - unsigned start_level; +typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *); +extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc; +#ifdef JEMALLOC_JET +void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); +#endif +rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); - if (unlikely(key == 0)) - return (rtree->height - 1); +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leafkey(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + uintptr_t mask = ~((ZU(1) << maskbits) - 1); + return (key & mask); +} - start_level = rtree->start_level[lg_floor(key) >> - LG_RTREE_BITS_PER_LEVEL]; - assert(start_level < rtree->height); - return (start_level); +JEMALLOC_ALWAYS_INLINE size_t +rtree_cache_direct_map(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1)); } JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) -{ +rtree_subkey(uintptr_t key, unsigned level) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = rtree_levels[level].cumbits; + unsigned shiftbits = ptrbits - cumbits; + unsigned maskbits = rtree_levels[level].bits; + uintptr_t mask = (ZU(1) << maskbits) - 1; + return ((key >> shiftbits) & mask); +} - return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - rtree->levels[level].cumbits)) & ((ZU(1) << - rtree->levels[level].bits) - 1)); +/* + * Atomic getters. + * + * dependent: Reading a value on behalf of a pointer to a valid allocation + * is guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ +# ifdef RTREE_LEAF_COMPACT +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { + return (uintptr_t)atomic_load_p(&elm->le_bits, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_bits_extent_get(uintptr_t bits) { + /* Restore sign-extended high bits, mask slab bit. */ + return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> + RTREE_NHIB) & ~((uintptr_t)0x1)); +} + +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_bits_szind_get(uintptr_t bits) { + return (szind_t)(bits >> LG_VADDR); } JEMALLOC_ALWAYS_INLINE bool -rtree_node_valid(rtree_node_elm_t *node) -{ +rtree_leaf_elm_bits_slab_get(uintptr_t bits) { + return (bool)(bits & (uintptr_t)0x1); +} - return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +# endif + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_extent_get(bits); +#else + extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); + return extent; +#endif } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_child_tryread(rtree_node_elm_t *elm, bool dependent) -{ - rtree_node_elm_t *child; - - /* Double-checked read (first read may be stale. */ - child = elm->child; - if (!dependent && !rtree_node_valid(child)) - child = atomic_read_p(&elm->pun); - assert(!dependent || child != NULL); - return (child); +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_szind_get(bits); +#else + return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED + : ATOMIC_ACQUIRE); +#endif +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_slab_get(bits); +#else + return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED : + ATOMIC_ACQUIRE); +#endif } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level, - bool dependent) -{ - rtree_node_elm_t *child; - - child = rtree_child_tryread(elm, dependent); - if (!dependent && unlikely(!rtree_node_valid(child))) - child = rtree_child_read_hard(rtree, elm, level); - assert(!dependent || child != NULL); - return (child); +static inline void +rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + extent_t *extent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) + | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE); +#endif } -JEMALLOC_ALWAYS_INLINE extent_node_t * -rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) -{ - - if (dependent) { - /* - * Reading a val on behalf of a pointer to a valid allocation is - * guaranteed to be a clean read even without synchronization, - * because the rtree update became visible in memory before the - * pointer came into existence. - */ - return (elm->val); - } else { - /* - * An arbitrary read, e.g. on behalf of ivsalloc(), may not be - * dependent on a previous rtree write, which means a stale read - * could result if synchronization were omitted here. - */ - return (atomic_read_p(&elm->pun)); +static inline void +rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + szind_t szind) { + assert(szind <= NSIZES); + +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE); +#endif +} + +static inline void +rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE); +#endif +} + +static inline void +rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + extent_t *extent, szind_t szind, bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); + /* + * Write extent last, since the element is atomically considered valid + * as soon as the extent field is non-NULL. + */ + rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent); +#endif +} + +static inline void +rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); + + /* + * The caller implicitly assures that it is the only writer to the szind + * and slab fields, and that the extent field cannot currently change. + */ + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); +} + +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, bool init_missing) { + assert(key != 0); + assert(!dependent || !init_missing); + + size_t slot = rtree_cache_direct_map(key); + uintptr_t leafkey = rtree_leafkey(key); + assert(leafkey != RTREE_LEAFKEY_INVALID); + + /* Fast path: L1 direct mapped cache. */ + if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) { + rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf; + assert(leaf != NULL); + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); + return &leaf[subkey]; } + /* + * Search the L2 LRU cache. On hit, swap the matching element into the + * slot in L1 cache, and move the position in L2 up by 1. + */ +#define RTREE_CACHE_CHECK_L2(i) do { \ + if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \ + rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \ + assert(leaf != NULL); \ + if (i > 0) { \ + /* Bubble up by one. */ \ + rtree_ctx->l2_cache[i].leafkey = \ + rtree_ctx->l2_cache[i - 1].leafkey; \ + rtree_ctx->l2_cache[i].leaf = \ + rtree_ctx->l2_cache[i - 1].leaf; \ + rtree_ctx->l2_cache[i - 1].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[i - 1].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } else { \ + rtree_ctx->l2_cache[0].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[0].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } \ + rtree_ctx->cache[slot].leafkey = leafkey; \ + rtree_ctx->cache[slot].leaf = leaf; \ + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \ + return &leaf[subkey]; \ + } \ +} while (0) + /* Check the first cache entry. */ + RTREE_CACHE_CHECK_L2(0); + /* Search the remaining cache elements. */ + for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) { + RTREE_CACHE_CHECK_L2(i); + } +#undef RTREE_CACHE_CHECK_L2 + + return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, + dependent, init_missing); } -JEMALLOC_INLINE void -rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) -{ +static inline bool +rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + extent_t *extent, szind_t szind, bool slab) { + /* Use rtree_clear() to set the extent to NULL. */ + assert(extent != NULL); - atomic_write_p(&elm->pun, val); + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, false, true); + if (elm == NULL) { + return true; + } + + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab); + + return false; } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) -{ - rtree_node_elm_t *subtree; - - /* Double-checked read (first read may be stale. */ - subtree = rtree->levels[level].subtree; - if (!dependent && unlikely(!rtree_node_valid(subtree))) - subtree = atomic_read_p(&rtree->levels[level].subtree_pun); - assert(!dependent || subtree != NULL); - return (subtree); +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + bool dependent) { + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, dependent, false); + if (!dependent && elm == NULL) { + return NULL; + } + assert(elm != NULL); + return elm; } -JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * -rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent) -{ - rtree_node_elm_t *subtree; +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NULL; + } + return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); +} - subtree = rtree_subtree_tryread(rtree, level, dependent); - if (!dependent && unlikely(!rtree_node_valid(subtree))) - subtree = rtree_subtree_read_hard(rtree, level); - assert(!dependent || subtree != NULL); - return (subtree); +JEMALLOC_ALWAYS_INLINE szind_t +rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NSIZES; + } + return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); } -JEMALLOC_ALWAYS_INLINE extent_node_t * -rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) -{ - uintptr_t subkey; - unsigned start_level; - rtree_node_elm_t *node; - - start_level = rtree_start_level(rtree, key); - - node = rtree_subtree_tryread(rtree, start_level, dependent); -#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) - switch (start_level + RTREE_GET_BIAS) { -#define RTREE_GET_SUBTREE(level) \ - case level: \ - assert(level < (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) \ - return (NULL); \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - node = rtree_child_tryread(&node[subkey], dependent); \ - /* Fall through. */ -#define RTREE_GET_LEAF(level) \ - case level: \ - assert(level == (RTREE_HEIGHT_MAX-1)); \ - if (!dependent && unlikely(!rtree_node_valid(node))) \ - return (NULL); \ - subkey = rtree_subkey(rtree, key, level - \ - RTREE_GET_BIAS); \ - /* \ - * node is a leaf, so it contains values rather than \ - * child pointers. \ - */ \ - return (rtree_val_read(rtree, &node[subkey], \ - dependent)); -#if RTREE_HEIGHT_MAX > 1 - RTREE_GET_SUBTREE(0) -#endif -#if RTREE_HEIGHT_MAX > 2 - RTREE_GET_SUBTREE(1) -#endif -#if RTREE_HEIGHT_MAX > 3 - RTREE_GET_SUBTREE(2) -#endif -#if RTREE_HEIGHT_MAX > 4 - RTREE_GET_SUBTREE(3) -#endif -#if RTREE_HEIGHT_MAX > 5 - RTREE_GET_SUBTREE(4) -#endif -#if RTREE_HEIGHT_MAX > 6 - RTREE_GET_SUBTREE(5) -#endif -#if RTREE_HEIGHT_MAX > 7 - RTREE_GET_SUBTREE(6) -#endif -#if RTREE_HEIGHT_MAX > 8 - RTREE_GET_SUBTREE(7) -#endif -#if RTREE_HEIGHT_MAX > 9 - RTREE_GET_SUBTREE(8) -#endif -#if RTREE_HEIGHT_MAX > 10 - RTREE_GET_SUBTREE(9) -#endif -#if RTREE_HEIGHT_MAX > 11 - RTREE_GET_SUBTREE(10) -#endif -#if RTREE_HEIGHT_MAX > 12 - RTREE_GET_SUBTREE(11) -#endif -#if RTREE_HEIGHT_MAX > 13 - RTREE_GET_SUBTREE(12) -#endif -#if RTREE_HEIGHT_MAX > 14 - RTREE_GET_SUBTREE(13) -#endif -#if RTREE_HEIGHT_MAX > 15 - RTREE_GET_SUBTREE(14) -#endif -#if RTREE_HEIGHT_MAX > 16 -# error Unsupported RTREE_HEIGHT_MAX -#endif - RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) -#undef RTREE_GET_SUBTREE -#undef RTREE_GET_LEAF - default: not_reached(); +/* + * rtree_slab_read() is intentionally omitted because slab is always read in + * conjunction with szind, which makes rtree_szind_slab_read() a better choice. + */ + +JEMALLOC_ALWAYS_INLINE bool +rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; } -#undef RTREE_GET_BIAS - not_reached(); + *r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + return false; } -JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) -{ - uintptr_t subkey; - unsigned i, start_level; - rtree_node_elm_t *node, *child; - - start_level = rtree_start_level(rtree, key); - - node = rtree_subtree_read(rtree, start_level, false); - if (node == NULL) - return (true); - for (i = start_level; /**/; i++, node = child) { - subkey = rtree_subkey(rtree, key, i); - if (i == rtree->height - 1) { - /* - * node is a leaf, so it contains values rather than - * child pointers. - */ - rtree_val_write(rtree, &node[subkey], val); - return (false); - } - assert(i + 1 < rtree->height); - child = rtree_child_read(rtree, &node[subkey], i, false); - if (child == NULL) - return (true); +JEMALLOC_ALWAYS_INLINE bool +rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; } - not_reached(); + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); + return false; +} + +static inline void +rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); + + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab); +} + +static inline void +rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) != + NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false); } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_RTREE_H */ diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h new file mode 100644 index 0000000..3cdc862 --- /dev/null +++ b/include/jemalloc/internal/rtree_tsd.h @@ -0,0 +1,50 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H +#define JEMALLOC_INTERNAL_RTREE_CTX_H + +/* + * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively. Each + * entry supports an entire leaf, so the cache hit rate is typically high even + * with a small number of entries. In rare cases extent activity will straddle + * the boundary between two leaf nodes. Furthermore, an arena may use a + * combination of dss and mmap. Note that as memory usage grows past the amount + * that this cache can directly cover, the cache will become less effective if + * locality of reference is low, but the consequence is merely cache misses + * while traversing the tree nodes. + * + * The L1 direct mapped cache offers consistent and low cost on cache hit. + * However collision could affect hit rate negatively. This is resolved by + * combining with a L2 LRU cache, which requires linear search and re-ordering + * on access but suffers no collision. Note that, the cache will itself suffer + * cache misses if made overly large, plus the cost of linear search in the LRU + * cache. + */ +#define RTREE_CTX_LG_NCACHE 4 +#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE) +#define RTREE_CTX_NCACHE_L2 8 + +/* + * Zero initializer required for tsd initialization only. Proper initialization + * done via rtree_ctx_data_init(). + */ +#define RTREE_CTX_ZERO_INITIALIZER {{{0}}} + + +typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; + +typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; +struct rtree_ctx_cache_elm_s { + uintptr_t leafkey; + rtree_leaf_elm_t *leaf; +}; + +typedef struct rtree_ctx_s rtree_ctx_t; +struct rtree_ctx_s { + /* Direct mapped cache. */ + rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; + /* L2 LRU cache. */ + rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; +}; + +void rtree_ctx_data_init(rtree_ctx_t *ctx); + +#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */ diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index f6fbce4..998994d 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -40,6 +40,54 @@ lg() { done } +lg_ceil() { + y=$1 + lg ${y}; lg_floor=${lg_result} + pow2 ${lg_floor}; pow2_floor=${pow2_result} + if [ ${pow2_floor} -lt ${y} ] ; then + lg_ceil_result=$((${lg_floor} + 1)) + else + lg_ceil_result=${lg_floor} + fi +} + +reg_size_compute() { + lg_grp=$1 + lg_delta=$2 + ndelta=$3 + + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + reg_size=$((${grp} + ${delta}*${ndelta})) +} + +slab_size() { + lg_p=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + + pow2 ${lg_p}; p=${pow2_result} + reg_size_compute ${lg_grp} ${lg_delta} ${ndelta} + + # Compute smallest slab size that is an integer multiple of reg_size. + try_slab_size=${p} + try_nregs=$((${try_slab_size} / ${reg_size})) + perfect=0 + while [ ${perfect} -eq 0 ] ; do + perfect_slab_size=${try_slab_size} + perfect_nregs=${try_nregs} + + try_slab_size=$((${try_slab_size} + ${p})) + try_nregs=$((${try_slab_size} / ${reg_size})) + if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then + perfect=1 + fi + done + + slab_size_pgs=$((${perfect_slab_size} / ${p})) +} + size_class() { index=$1 lg_grp=$2 @@ -80,8 +128,10 @@ size_class() { if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then bin="yes" + slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs} else bin="no" + pgs=0 fi if [ ${lg_size} -lt ${lg_kmax} \ -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then @@ -89,10 +139,11 @@ size_class() { else lg_delta_lookup="no" fi - printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup} + printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup} # Defined upon return: # - psz ("yes" or "no") # - bin ("yes" or "no") + # - pgs # - lg_delta_lookup (${lg_delta} or "no") } @@ -110,8 +161,8 @@ size_classes() { pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} pow2 ${lg_g}; g=${pow2_result} - echo "#define SIZE_CLASSES \\" - echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\" + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\" ntbins=0 nlbins=0 @@ -197,7 +248,7 @@ size_classes() { fi fi # Final written value is correct: - huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) done @@ -206,53 +257,61 @@ size_classes() { done echo nsizes=${index} + lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result} # Defined upon completion: # - ntbins # - nlbins # - nbins # - nsizes + # - lg_ceil_nsizes # - npsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass # - lg_large_minclass - # - huge_maxclass + # - large_maxclass } cat <<EOF +#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H +#define JEMALLOC_INTERNAL_SIZE_CLASSES_H + /* This file was automatically generated by size_classes.sh. */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES + +#include "jemalloc/internal/jemalloc_internal_types.h" /* - * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to - * be defined prior to inclusion, and it in turn defines: + * This header file defines: * * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. + * LG_TINY_MIN: Lg of minimum size class to support. * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz, - * bin, lg_delta_lookup) tuples. + * bin, pgs, lg_delta_lookup) tuples. * index: Size class index. * lg_grp: Lg group base size (no deltas added). * lg_delta: Lg delta to previous size class. * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta * psz: 'yes' if a multiple of the page size, 'no' otherwise. * bin: 'yes' if a small bin size class, 'no' otherwise. + * pgs: Slab page count if a small bin size class, 0 otherwise. * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no' * otherwise. * NTBINS: Number of tiny bins. * NLBINS: Number of bins supported by the lookup table. * NBINS: Number of small size class bins. * NSIZES: Number of size classes. + * LG_CEIL_NSIZES: Number of bits required to store NSIZES. * NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE). * LG_TINY_MAXCLASS: Lg of maximum tiny size class. * LOOKUP_MAXCLASS: Maximum size class included in lookup table. * SMALL_MAXCLASS: Maximum small size class. * LG_LARGE_MINCLASS: Lg of minimum large size class. - * HUGE_MAXCLASS: Maximum (huge) size class. + * LARGE_MAXCLASS: Maximum (large) size class. */ -#define LG_SIZE_CLASS_GROUP ${lg_g} +#define LG_SIZE_CLASS_GROUP ${lg_g} +#define LG_TINY_MIN ${lg_tmin} EOF @@ -264,17 +323,19 @@ for lg_z in ${lg_zarr} ; do for lg_p in ${lg_parr} ; do echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g} - echo "#define SIZE_CLASSES_DEFINED" - echo "#define NTBINS ${ntbins}" - echo "#define NLBINS ${nlbins}" - echo "#define NBINS ${nbins}" - echo "#define NSIZES ${nsizes}" - echo "#define NPSIZES ${npsizes}" - echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" - echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" - echo "#define SMALL_MAXCLASS ${small_maxclass}" - echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" - echo "#define HUGE_MAXCLASS ${huge_maxclass}" + echo "#define SIZE_CLASSES_DEFINED" + echo "#define NTBINS ${ntbins}" + echo "#define NLBINS ${nlbins}" + echo "#define NBINS ${nbins}" + echo "#define NSIZES ${nsizes}" + echo "#define LG_CEIL_NSIZES ${lg_ceil_nsizes}" + echo "#define NPSIZES ${npsizes}" + echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" + echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" + echo "#define SMALL_MAXCLASS ${small_maxclass}" + echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" + echo "#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)" + echo "#define LARGE_MAXCLASS ${large_maxclass}" echo "#endif" echo done @@ -290,29 +351,11 @@ cat <<EOF #undef SIZE_CLASSES_DEFINED /* * The size2index_tab lookup table uses uint8_t to encode each bin index, so we - * cannot support more than 256 small size classes. Further constrain NBINS to - * 255 since all small size classes, plus a "not small" size class must be - * stored in 8 bits of arena_chunk_map_bits_t's bits field. + * cannot support more than 256 small size classes. */ -#if (NBINS > 255) +#if (NBINS > 256) # error "Too many small size classes" #endif -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */ EOF diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h index c5333cc..2e14430 100644 --- a/include/jemalloc/internal/smoothstep.h +++ b/include/jemalloc/internal/smoothstep.h @@ -1,9 +1,11 @@ +#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H +#define JEMALLOC_INTERNAL_SMOOTHSTEP_H + /* * This file was generated by the following command: * sh smoothstep.sh smoother 200 24 3 15 */ /******************************************************************************/ -#ifdef JEMALLOC_H_TYPES /* * This header defines a precomputed table based on the smoothstep family of @@ -21,10 +23,10 @@ * smootheststep(x) = -20x + 70x - 84x + 35x */ -#define SMOOTHSTEP_VARIANT "smoother" -#define SMOOTHSTEP_NSTEPS 200 -#define SMOOTHSTEP_BFP 24 -#define SMOOTHSTEP \ +#define SMOOTHSTEP_VARIANT "smoother" +#define SMOOTHSTEP_NSTEPS 200 +#define SMOOTHSTEP_BFP 24 +#define SMOOTHSTEP \ /* STEP(step, h, x, y) */ \ STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ @@ -227,20 +229,4 @@ STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh index 8124693..65de97b 100755 --- a/include/jemalloc/internal/smoothstep.sh +++ b/include/jemalloc/internal/smoothstep.sh @@ -54,12 +54,14 @@ smoothest() { } cat <<EOF +#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H +#define JEMALLOC_INTERNAL_SMOOTHSTEP_H + /* * This file was generated by the following command: * $cmd */ /******************************************************************************/ -#ifdef JEMALLOC_H_TYPES /* * This header defines a precomputed table based on the smoothstep family of @@ -77,10 +79,10 @@ cat <<EOF * smootheststep(x) = -20x + 70x - 84x + 35x */ -#define SMOOTHSTEP_VARIANT "${variant}" -#define SMOOTHSTEP_NSTEPS ${nsteps} -#define SMOOTHSTEP_BFP ${bfp} -#define SMOOTHSTEP \\ +#define SMOOTHSTEP_VARIANT "${variant}" +#define SMOOTHSTEP_NSTEPS ${nsteps} +#define SMOOTHSTEP_BFP ${bfp} +#define SMOOTHSTEP \\ /* STEP(step, h, x, y) */ \\ EOF @@ -95,21 +97,5 @@ done echo cat <<EOF -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ EOF diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h index 9ef5ceb..e2afc98 100644 --- a/include/jemalloc/internal/spin.h +++ b/include/jemalloc/internal/spin.h @@ -1,51 +1,36 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_SPIN_H +#define JEMALLOC_INTERNAL_SPIN_H -typedef struct spin_s spin_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct spin_s { - unsigned iteration; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void spin_init(spin_t *spin); -void spin_adaptive(spin_t *spin); +#ifdef JEMALLOC_SPIN_C_ +# define SPIN_INLINE extern inline +#else +# define SPIN_INLINE inline #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_)) -JEMALLOC_INLINE void -spin_init(spin_t *spin) -{ +#define SPIN_INITIALIZER {0U} - spin->iteration = 0; -} - -JEMALLOC_INLINE void -spin_adaptive(spin_t *spin) -{ - volatile uint64_t i; +typedef struct { + unsigned iteration; +} spin_t; - for (i = 0; i < (KQU(1) << spin->iteration); i++) - CPU_SPINWAIT; +SPIN_INLINE void +spin_adaptive(spin_t *spin) { + volatile uint32_t i; - if (spin->iteration < 63) + if (spin->iteration < 5) { + for (i = 0; i < (1U << spin->iteration); i++) { + CPU_SPINWAIT; + } spin->iteration++; -} - + } else { +#ifdef _WIN32 + SwitchToThread(); +#else + sched_yield(); #endif + } +} -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#undef SPIN_INLINE +#endif /* JEMALLOC_INTERNAL_SPIN_H */ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 04e7dae..1198779 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -1,26 +1,51 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_STATS_H +#define JEMALLOC_INTERNAL_STATS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats_tsd.h" + +/* OPTION(opt, var_name, default, set_value_to) */ +#define STATS_PRINT_OPTIONS \ + OPTION('J', json, false, true) \ + OPTION('g', general, true, false) \ + OPTION('m', merged, config_stats, false) \ + OPTION('d', destroyed, config_stats, false) \ + OPTION('a', unmerged, config_stats, false) \ + OPTION('b', bins, true, false) \ + OPTION('l', large, true, false) \ + OPTION('x', mutex, true, false) + +enum { +#define OPTION(o, v, d, s) stats_print_option_num_##v, + STATS_PRINT_OPTIONS +#undef OPTION + stats_print_tot_num_options +}; -typedef struct tcache_bin_stats_s tcache_bin_stats_t; -typedef struct malloc_bin_stats_s malloc_bin_stats_t; -typedef struct malloc_large_stats_s malloc_large_stats_t; -typedef struct malloc_huge_stats_s malloc_huge_stats_t; -typedef struct arena_stats_s arena_stats_t; -typedef struct chunk_stats_s chunk_stats_t; +/* Options for stats_print. */ +extern bool opt_stats_print; +extern char opt_stats_print_opts[stats_print_tot_num_options+1]; -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Implements je_malloc_stats_print. */ +void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts); -struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -}; +/* + * In those architectures that support 64-bit atomics, we use atomic updates for + * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize + * externally. + */ +#ifdef JEMALLOC_ATOMIC_U64 +typedef atomic_u64_t arena_stats_u64_t; +#else +/* Must hold the arena stats mutex while reading atomically. */ +typedef uint64_t arena_stats_u64_t; +#endif -struct malloc_bin_stats_s { +typedef struct malloc_bin_stats_s { /* * Total number of allocation/deallocation requests served directly by * the bin. Note that tcache may allocate an object, then recycle it @@ -49,149 +74,91 @@ struct malloc_bin_stats_s { /* Number of tcache flushes to this bin. */ uint64_t nflushes; - /* Total number of runs created for this bin's size class. */ - uint64_t nruns; + /* Total number of slabs created for this bin's size class. */ + uint64_t nslabs; /* - * Total number of runs reused by extracting them from the runs tree for - * this bin's size class. + * Total number of slabs reused by extracting them from the slabs heap + * for this bin's size class. */ - uint64_t reruns; + uint64_t reslabs; - /* Current number of runs in this bin. */ - size_t curruns; -}; + /* Current number of slabs in this bin. */ + size_t curslabs; + + mutex_prof_data_t mutex_data; +} malloc_bin_stats_t; -struct malloc_large_stats_s { +typedef struct malloc_large_stats_s { /* * Total number of allocation/deallocation requests served directly by - * the arena. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. + * the arena. */ - uint64_t nmalloc; - uint64_t ndalloc; + arena_stats_u64_t nmalloc; + arena_stats_u64_t ndalloc; /* * Number of allocation requests that correspond to this size class. * This includes requests served by tcache, though tcache only * periodically merges into this counter. */ - uint64_t nrequests; + arena_stats_u64_t nrequests; /* Partially derived. */ + + /* Current number of allocations of this size class. */ + size_t curlextents; /* Derived. */ +} malloc_large_stats_t; + +typedef struct decay_stats_s { + /* Total number of purge sweeps. */ + arena_stats_u64_t npurge; + /* Total number of madvise calls made. */ + arena_stats_u64_t nmadvise; + /* Total number of pages purged. */ + arena_stats_u64_t purged; +} decay_stats_t; + +/* + * Arena stats. Note that fields marked "derived" are not directly maintained + * within the arena code; rather their values are derived during stats merge + * requests. + */ +typedef struct arena_stats_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif - /* - * Current number of runs of this size class, including runs currently - * cached by tcache. - */ - size_t curruns; -}; + /* Number of bytes currently mapped, excluding retained memory. */ + atomic_zu_t mapped; /* Partially derived. */ -struct malloc_huge_stats_s { /* - * Total number of allocation/deallocation requests served directly by - * the arena. + * Number of unused virtual memory bytes currently retained. Retained + * bytes are technically mapped (though always decommitted or purged), + * but they are excluded from the mapped statistic (above). */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* Current number of (multi-)chunk allocations of this size class. */ - size_t curhchunks; -}; + atomic_zu_t retained; /* Derived. */ -struct arena_stats_s { - /* Number of bytes currently mapped. */ - size_t mapped; + decay_stats_t decay_dirty; + decay_stats_t decay_muzzy; - /* - * Number of bytes currently retained as a side effect of munmap() being - * disabled/bypassed. Retained bytes are technically mapped (though - * always decommitted or purged), but they are excluded from the mapped - * statistic (above). - */ - size_t retained; + atomic_zu_t base; /* Derived. */ + atomic_zu_t internal; + atomic_zu_t resident; /* Derived. */ - /* - * Total number of purge sweeps, total number of madvise calls made, - * and total pages purged in order to keep dirty unused memory under - * control. - */ - uint64_t npurge; - uint64_t nmadvise; - uint64_t purged; - - /* - * Number of bytes currently mapped purely for metadata purposes, and - * number of bytes currently allocated for internal metadata. - */ - size_t metadata_mapped; - size_t metadata_allocated; /* Protected via atomic_*_z(). */ + atomic_zu_t allocated_large; /* Derived. */ + arena_stats_u64_t nmalloc_large; /* Derived. */ + arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nrequests_large; /* Derived. */ - /* Per-size-category statistics. */ - size_t allocated_large; - uint64_t nmalloc_large; - uint64_t ndalloc_large; - uint64_t nrequests_large; + /* Number of bytes cached in tcache associated with this arena. */ + atomic_zu_t tcache_bytes; /* Derived. */ - size_t allocated_huge; - uint64_t nmalloc_huge; - uint64_t ndalloc_huge; + mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; /* One element for each large size class. */ - malloc_large_stats_t *lstats; + malloc_large_stats_t lstats[NSIZES - NBINS]; - /* One element for each huge size class. */ - malloc_huge_stats_t *hstats; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_stats_print; - -extern size_t stats_cactive; - -void stats_print(void (*write)(void *, const char *), void *cbopaque, - const char *opts); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -size_t stats_cactive_get(void); -void stats_cactive_add(size_t size); -void stats_cactive_sub(size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) -JEMALLOC_INLINE size_t -stats_cactive_get(void) -{ - - return (atomic_read_z(&stats_cactive)); -} - -JEMALLOC_INLINE void -stats_cactive_add(size_t size) -{ - - assert(size > 0); - assert((size & chunksize_mask) == 0); - - atomic_add_z(&stats_cactive, size); -} - -JEMALLOC_INLINE void -stats_cactive_sub(size_t size) -{ - - assert(size > 0); - assert((size & chunksize_mask) == 0); - - atomic_sub_z(&stats_cactive, size); -} -#endif + /* Arena uptime. */ + nstime_t uptime; +} arena_stats_t; -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_STATS_H */ diff --git a/include/jemalloc/internal/stats_tsd.h b/include/jemalloc/internal/stats_tsd.h new file mode 100644 index 0000000..d0c3bbe --- /dev/null +++ b/include/jemalloc/internal/stats_tsd.h @@ -0,0 +1,12 @@ +#ifndef JEMALLOC_INTERNAL_STATS_TSD_H +#define JEMALLOC_INTERNAL_STATS_TSD_H + +typedef struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +} tcache_bin_stats_t; + +#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */ diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h new file mode 100644 index 0000000..7f640d5 --- /dev/null +++ b/include/jemalloc/internal/sz.h @@ -0,0 +1,317 @@ +#ifndef JEMALLOC_INTERNAL_SIZE_H +#define JEMALLOC_INTERNAL_SIZE_H + +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/util.h" + +/* + * sz module: Size computations. + * + * Some abbreviations used here: + * p: Page + * ind: Index + * s, sz: Size + * u: Usable size + * a: Aligned + * + * These are not always used completely consistently, but should be enough to + * interpret function names. E.g. sz_psz2ind converts page size to page size + * index; sz_sa2u converts a (size, alignment) allocation request to the usable + * size that would result from such an allocation. + */ + +/* + * sz_pind2sz_tab encodes the same information as could be computed by + * sz_pind2sz_compute(). + */ +extern size_t const sz_pind2sz_tab[NPSIZES+1]; +/* + * sz_index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by sz_index2size_compute(). + */ +extern size_t const sz_index2size_tab[NSIZES]; +/* + * sz_size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via sz_size2index(). + */ +extern uint8_t const sz_size2index_tab[]; + +static const size_t sz_large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; + +JEMALLOC_ALWAYS_INLINE pszind_t +sz_psz2ind(size_t psz) { + if (unlikely(psz > LARGE_MAXCLASS)) { + return NPSIZES; + } + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZD(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return ind; + } +} + +static inline size_t +sz_pind2sz_compute(pszind_t pind) { + if (unlikely(pind == NPSIZES)) { + return LARGE_MAXCLASS + PAGE; + } + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return sz; + } +} + +static inline size_t +sz_pind2sz_lookup(pszind_t pind) { + size_t ret = (size_t)sz_pind2sz_tab[pind]; + assert(ret == sz_pind2sz_compute(pind)); + return ret; +} + +static inline size_t +sz_pind2sz(pszind_t pind) { + assert(pind < NPSIZES+1); + return sz_pind2sz_lookup(pind); +} + +static inline size_t +sz_psz2u(size_t psz) { + if (unlikely(psz > LARGE_MAXCLASS)) { + return LARGE_MAXCLASS + PAGE; + } + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return usize; + } +} + +static inline szind_t +sz_size2index_compute(size_t size) { + if (unlikely(size > LARGE_MAXCLASS)) { + return NSIZES; + } +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } +#endif + { + szind_t x = lg_floor((size<<1)-1); + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + szind_t grp = shift << LG_SIZE_CLASS_GROUP; + + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZD(-1) << lg_delta; + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + szind_t index = NTBINS + grp + mod; + return index; + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +sz_size2index_lookup(size_t size) { + assert(size <= LOOKUP_MAXCLASS); + { + szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]); + assert(ret == sz_size2index_compute(size)); + return ret; + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +sz_size2index(size_t size) { + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) { + return sz_size2index_lookup(size); + } + return sz_size2index_compute(size); +} + +static inline size_t +sz_index2size_compute(szind_t index) { +#if (NTBINS > 0) + if (index < NTBINS) { + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); + } +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return usize; + } +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size_lookup(szind_t index) { + size_t ret = (size_t)sz_index2size_tab[index]; + assert(ret == sz_index2size_compute(index)); + return ret; +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size(szind_t index) { + assert(index < NSIZES); + return sz_index2size_lookup(index); +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u_compute(size_t size) { + if (unlikely(size > LARGE_MAXCLASS)) { + return 0; + } +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return usize; + } +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u_lookup(size_t size) { + size_t ret = sz_index2size_lookup(sz_size2index_lookup(size)); + + assert(ret == sz_s2u_compute(size)); + return ret; +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u(size_t size) { + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) { + return sz_s2u_lookup(size); + } + return sz_s2u_compute(size); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_ALWAYS_INLINE size_t +sz_sa2u(size_t size, size_t alignment) { + size_t usize; + + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = sz_s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) { + return usize; + } + } + + /* Large size class. Beware of overflow. */ + + if (unlikely(alignment > LARGE_MAXCLASS)) { + return 0; + } + + /* Make sure result is a large size class. */ + if (size <= LARGE_MINCLASS) { + usize = LARGE_MINCLASS; + } else { + usize = sz_s2u(size); + if (usize < size) { + /* size_t overflow. */ + return 0; + } + } + + /* + * Calculate the multi-page mapping that large_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + sz_large_pad + PAGE_CEILING(alignment) - PAGE < usize) { + /* size_t overflow. */ + return 0; + } + return usize; +} + +#endif /* JEMALLOC_INTERNAL_SIZE_H */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h deleted file mode 100644 index 5fe5ebf..0000000 --- a/include/jemalloc/internal/tcache.h +++ /dev/null @@ -1,472 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* - * tcache pointers close to NULL are used to encode state information that is - * used for two purposes: preventing thread caching on a per thread basis and - * cleaning up during thread shutdown. - */ -#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) -#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) -#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) -#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY - -/* - * Absolute minimum number of cache slots for each small bin. - */ -#define TCACHE_NSLOTS_SMALL_MIN 20 - -/* - * Absolute maximum number of cache slots for each small bin in the thread - * cache. This is an additional constraint beyond that imposed as: twice the - * number of regions per run for this size class. - * - * This constant must be an even number. - */ -#define TCACHE_NSLOTS_SMALL_MAX 200 - -/* Number of cache slots for large size classes. */ -#define TCACHE_NSLOTS_LARGE 20 - -/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ -#define LG_TCACHE_MAXCLASS_DEFAULT 15 - -/* - * TCACHE_GC_SWEEP is the approximate number of allocation events between - * full GC sweeps. Integer rounding may cause the actual number to be - * slightly higher, since GC is performed incrementally. - */ -#define TCACHE_GC_SWEEP 8192 - -/* Number of tcache allocation/deallocation events between incremental GCs. */ -#define TCACHE_GC_INCR \ - ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -typedef enum { - tcache_enabled_false = 0, /* Enable cast to/from bool. */ - tcache_enabled_true = 1, - tcache_enabled_default = 2 -} tcache_enabled_t; - -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { - tcache_bin_stats_t tstats; - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ - unsigned ncached; /* # of cached objects. */ - /* - * To make use of adjacent cacheline prefetch, the items in the avail - * stack goes to higher address for newer allocations. avail points - * just above the available space, which means that - * avail[-ncached, ... -1] are available items and the lowest item will - * be allocated first. - */ - void **avail; /* Stack of available objects. */ -}; - -struct tcache_s { - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ - szind_t next_gc_bin; /* Next bin to GC. */ - tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_tcache; -extern ssize_t opt_lg_tcache_max; - -extern tcache_bin_info_t *tcache_bin_info; - -/* - * Number of tcache bins. There are NBINS small-object bins, plus 0 or more - * large-object bins. - */ -extern unsigned nhbins; - -/* Maximum cached size class. */ -extern size_t tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, - unsigned rem, tcache_t *tcache); -void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, - arena_t *oldarena, arena_t *newarena); -tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); -void tcache_cleanup(tsd_t *tsd); -void tcache_enabled_cleanup(tsd_t *tsd); -void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsd_t *tsd, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn); -void tcache_prefork(tsdn_t *tsdn); -void tcache_postfork_parent(tsdn_t *tsdn); -void tcache_postfork_child(tsdn_t *tsdn); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tsd_t *tsd, tcache_t *tcache); -void tcache_flush(void); -bool tcache_enabled_get(void); -tcache_t *tcache_get(tsd_t *tsd, bool create); -void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); -void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); -void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, szind_t ind, bool zero, bool slow_path); -void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind, bool slow_path); -void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size, bool slow_path); -tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -JEMALLOC_INLINE void -tcache_flush(void) -{ - tsd_t *tsd; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_cleanup(tsd); -} - -JEMALLOC_INLINE bool -tcache_enabled_get(void) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_enabled = tsd_tcache_enabled_get(tsd); - if (tcache_enabled == tcache_enabled_default) { - tcache_enabled = (tcache_enabled_t)opt_tcache; - tsd_tcache_enabled_set(tsd, tcache_enabled); - } - - return ((bool)tcache_enabled); -} - -JEMALLOC_INLINE void -tcache_enabled_set(bool enabled) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - - tcache_enabled = (tcache_enabled_t)enabled; - tsd_tcache_enabled_set(tsd, tcache_enabled); - - if (!enabled) - tcache_cleanup(tsd); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(tsd_t *tsd, bool create) -{ - tcache_t *tcache; - - if (!config_tcache) - return (NULL); - - tcache = tsd_tcache_get(tsd); - if (!create) - return (tcache); - if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { - tcache = tcache_get_hard(tsd); - tsd_tcache_set(tsd, tcache); - } - - return (tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_event(tsd_t *tsd, tcache_t *tcache) -{ - - if (TCACHE_GC_INCR == 0) - return; - - if (unlikely(ticker_tick(&tcache->gc_ticker))) - tcache_event_hard(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) -{ - void *ret; - - if (unlikely(tbin->ncached == 0)) { - tbin->low_water = -1; - *tcache_success = false; - return (NULL); - } - /* - * tcache_success (instead of ret) should be checked upon the return of - * this function. We avoid checking (ret == NULL) because there is - * never a null stored on the avail stack (which is unknown to the - * compiler), and eagerly checking ret would cause pipeline stall - * (waiting for the cacheline). - */ - *tcache_success = true; - ret = *(tbin->avail - tbin->ncached); - tbin->ncached--; - - if (unlikely((int)tbin->ncached < tbin->low_water)) - tbin->low_water = tbin->ncached; - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) -{ - void *ret; - tcache_bin_t *tbin; - bool tcache_success; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - - assert(binind < NBINS); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { - bool tcache_hard_success; - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, - tbin, binind, &tcache_hard_success); - if (tcache_hard_success == false) - return (NULL); - } - - assert(ret); - /* - * Only compute usize if required. The checks in the following if - * statement are all static. - */ - if (config_prof || (slow_path && config_fill) || unlikely(zero)) { - usize = index2size(binind); - assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); - } - - if (likely(!zero)) { - if (slow_path && config_fill) { - if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); - } else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else { - if (slow_path && config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); - } - memset(ret, 0, usize); - } - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) -{ - void *ret; - tcache_bin_t *tbin; - bool tcache_success; - - assert(binind < nhbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin, &tcache_success); - assert(tcache_success == (ret != NULL)); - if (unlikely(!tcache_success)) { - /* - * Only allocate one large object at a time, because it's quite - * expensive to create one and not use it. - */ - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero); - if (ret == NULL) - return (NULL); - } else { - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - - /* Only compute usize on demand */ - if (config_prof || (slow_path && config_fill) || - unlikely(zero)) { - usize = index2size(binind); - assert(usize <= tcache_maxclass); - } - - if (config_prof && usize == LARGE_MINCLASS) { - arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - LG_PAGE); - arena_mapbits_large_binind_set(chunk, pageind, - BININD_INVALID); - } - if (likely(!zero)) { - if (slow_path && config_fill) { - if (unlikely(opt_junk_alloc)) { - memset(ret, JEMALLOC_ALLOC_JUNK, - usize); - } else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else - memset(ret, 0, usize); - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - } - - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, - bool slow_path) -{ - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); - - if (slow_path && config_fill && unlikely(opt_junk_free)) - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tcache, tbin, binind, - (tbin_info->ncached_max >> 1)); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, - bool slow_path) -{ - szind_t binind; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert((size & PAGE_MASK) == 0); - assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); - - binind = size2index(size); - - if (slow_path && config_fill && unlikely(opt_junk_free)) - arena_dalloc_junk_large(ptr, size); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcaches_get(tsd_t *tsd, unsigned ind) -{ - tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) { - elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, - NULL)); - } - return (elm->tcache); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h new file mode 100644 index 0000000..abe133f --- /dev/null +++ b/include/jemalloc/internal/tcache_externs.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H +#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H + +#include "jemalloc/internal/size_classes.h" + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern tcache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern unsigned nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, + arena_t *arena); +tcache_t *tcache_create_explicit(tsd_t *tsd); +void tcache_cleanup(tsd_t *tsd); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn); +void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); +void tcache_flush(void); +bool tsd_tcache_data_init(tsd_t *tsd); +bool tsd_tcache_enabled_data_init(tsd_t *tsd); + +#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h new file mode 100644 index 0000000..c55bcd2 --- /dev/null +++ b/include/jemalloc/internal/tcache_inlines.h @@ -0,0 +1,250 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H +#define JEMALLOC_INTERNAL_TCACHE_INLINES_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/util.h" + +static inline bool +tcache_enabled_get(tsd_t *tsd) { + return tsd_tcache_enabled_get(tsd); +} + +static inline void +tcache_enabled_set(tsd_t *tsd, bool enabled) { + bool was_enabled = tsd_tcache_enabled_get(tsd); + + if (!was_enabled && enabled) { + tsd_tcache_data_init(tsd); + } else if (was_enabled && !enabled) { + tcache_cleanup(tsd); + } + /* Commit the state last. Above calls check current state. */ + tsd_tcache_enabled_set(tsd, enabled); + tsd_slow_update(tsd); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_event(tsd_t *tsd, tcache_t *tcache) { + if (TCACHE_GC_INCR == 0) { + return; + } + + if (unlikely(ticker_tick(&tcache->gc_ticker))) { + tcache_event_hard(tsd, tcache); + } +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { + void *ret; + + if (unlikely(tbin->ncached == 0)) { + tbin->low_water = -1; + *tcache_success = false; + return NULL; + } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); + tbin->ncached--; + + if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) { + tbin->low_water = tbin->ncached; + } + + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) { + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + assert(binind < NBINS); + tbin = tcache_small_bin_get(tcache, binind); + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + return NULL; + } + + ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, + tbin, binind, &tcache_hard_success); + if (tcache_hard_success == false) { + return NULL; + } + } + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = sz_index2size(binind); + assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (unlikely(opt_zero)) { + memset(ret, 0, usize); + } + } + } else { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + memset(ret, 0, usize); + } + + if (config_stats) { + tbin->tstats.nrequests++; + } + if (config_prof) { + tcache->prof_accumbytes += usize; + } + tcache_event(tsd, tcache); + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) { + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + + assert(binind >= NBINS &&binind < nhbins); + tbin = tcache_large_bin_get(tcache, binind); + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + return NULL; + } + + ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero); + if (ret == NULL) { + return NULL; + } + } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = sz_index2size(binind); + assert(usize <= tcache_maxclass); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + memset(ret, JEMALLOC_ALLOC_JUNK, + usize); + } else if (unlikely(opt_zero)) { + memset(ret, 0, usize); + } + } + } else { + memset(ret, 0, usize); + } + + if (config_stats) { + tbin->tstats.nrequests++; + } + if (config_prof) { + tcache->prof_accumbytes += usize; + } + } + + tcache_event(tsd, tcache); + return ret; +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + + if (slow_path && config_fill && unlikely(opt_junk_free)) { + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + } + + tbin = tcache_small_bin_get(tcache, binind); + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); + + if (slow_path && config_fill && unlikely(opt_junk_free)) { + large_dalloc_junk(ptr, sz_index2size(binind)); + } + + tbin = tcache_large_bin_get(tcache, binind); + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) { + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create_explicit(tsd); + } + return elm->tcache; +} + +#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */ diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h new file mode 100644 index 0000000..7eb516f --- /dev/null +++ b/include/jemalloc/internal/tcache_structs.h @@ -0,0 +1,64 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H +#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H + +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats_tsd.h" +#include "jemalloc/internal/ticker.h" + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { + low_water_t low_water; /* Min # cached since last GC. */ + uint32_t ncached; /* # of cached objects. */ + /* + * ncached and stats are both modified frequently. Let's keep them + * close so that they have a higher chance of being on the same + * cacheline, thus less write-backs. + */ + tcache_bin_stats_t tstats; + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { + /* Data accessed frequently first: prof, ticker and small bins. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ + ticker_t gc_ticker; /* Drives incremental GC. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ + tcache_bin_t tbins_small[NBINS]; + /* Data accessed less often below. */ + ql_elm(tcache_t) link; /* Used for aggregating stats. */ + arena_t *arena; /* Associated arena. */ + szind_t next_gc_bin; /* Next bin to GC. */ + /* For small bins, fill (ncached_max >> lg_fill_div). */ + uint8_t lg_fill_div[NBINS]; + tcache_bin_t tbins_large[NSIZES-NBINS]; +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h new file mode 100644 index 0000000..1155d62 --- /dev/null +++ b/include/jemalloc/internal/tcache_types.h @@ -0,0 +1,61 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H +#define JEMALLOC_INTERNAL_TCACHE_TYPES_H + +#include "jemalloc/internal/size_classes.h" + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* ncached is cast to this type for comparison. */ +typedef int32_t low_water_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per slab for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +/* Used in TSD static initializer only. Real init in tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER {0} + +/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ +#define TCACHE_ENABLED_ZERO_INITIALIZER false + +#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h index 4696e56..572b964 100644 --- a/include/jemalloc/internal/ticker.h +++ b/include/jemalloc/internal/ticker.h @@ -1,75 +1,50 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ticker_s ticker_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ticker_s { - int32_t tick; - int32_t nticks; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void ticker_init(ticker_t *ticker, int32_t nticks); -void ticker_copy(ticker_t *ticker, const ticker_t *other); -int32_t ticker_read(const ticker_t *ticker); -bool ticker_ticks(ticker_t *ticker, int32_t nticks); -bool ticker_tick(ticker_t *ticker); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) -JEMALLOC_INLINE void -ticker_init(ticker_t *ticker, int32_t nticks) -{ - +#ifndef JEMALLOC_INTERNAL_TICKER_H +#define JEMALLOC_INTERNAL_TICKER_H + +#include "jemalloc/internal/util.h" + +/** + * A ticker makes it easy to count-down events until some limit. You + * ticker_init the ticker to trigger every nticks events. You then notify it + * that an event has occurred with calls to ticker_tick (or that nticks events + * have occurred with a call to ticker_ticks), which will return true (and reset + * the counter) if the countdown hit zero. + */ + +typedef struct { + int32_t tick; + int32_t nticks; +} ticker_t; + +static inline void +ticker_init(ticker_t *ticker, int32_t nticks) { ticker->tick = nticks; ticker->nticks = nticks; } -JEMALLOC_INLINE void -ticker_copy(ticker_t *ticker, const ticker_t *other) -{ - +static inline void +ticker_copy(ticker_t *ticker, const ticker_t *other) { *ticker = *other; } -JEMALLOC_INLINE int32_t -ticker_read(const ticker_t *ticker) -{ - - return (ticker->tick); +static inline int32_t +ticker_read(const ticker_t *ticker) { + return ticker->tick; } -JEMALLOC_INLINE bool -ticker_ticks(ticker_t *ticker, int32_t nticks) -{ - +static inline bool +ticker_ticks(ticker_t *ticker, int32_t nticks) { if (unlikely(ticker->tick < nticks)) { ticker->tick = ticker->nticks; - return (true); + return true; } ticker->tick -= nticks; return(false); } -JEMALLOC_INLINE bool -ticker_tick(ticker_t *ticker) -{ - - return (ticker_ticks(ticker, 1)); +static inline bool +ticker_tick(ticker_t *ticker) { + return ticker_ticks(ticker, 1); } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_TICKER_H */ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 9f37433..631fbf1 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -1,631 +1,122 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 2 - -typedef bool (*malloc_tsd_cleanup_t)(void); - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -typedef struct tsd_init_block_s tsd_init_block_t; -typedef struct tsd_init_head_s tsd_init_head_t; -#endif - -typedef struct tsd_s tsd_t; -typedef struct tsdn_s tsdn_t; - -#define TSDN_NULL ((tsdn_t *)0) - -typedef enum { - tsd_state_uninitialized, - tsd_state_nominal, - tsd_state_purgatory, - tsd_state_reincarnated -} tsd_state_t; +#ifndef JEMALLOC_INTERNAL_TSD_H +#define JEMALLOC_INTERNAL_TSD_H + +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/prof_types.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/rtree_tsd.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" /* - * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are five macros that support (at least) three use cases: file-private, - * library-private, and library-private inlined. Following is an example - * library-private tsd variable: - * - * In example.h: - * typedef struct { - * int x; - * int y; - * } example_t; - * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_types(example_, example_t) - * malloc_tsd_protos(, example_, example_t) - * malloc_tsd_externs(example_, example_t) - * In example.c: - * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, - * example_tsd_cleanup) - * - * The result is a set of generated functions, e.g.: + * Thread-Specific-Data layout + * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- + * s: state + * e: tcache_enabled + * m: thread_allocated (config_stats) + * f: thread_deallocated (config_stats) + * p: prof_tdata (config_prof) + * c: rtree_ctx (rtree cache accessed on deallocation) + * t: tcache + * --- data not accessed on tcache fast path: arena-related fields --- + * d: arenas_tdata_bypass + * r: reentrancy_level + * x: narenas_tdata + * i: iarena + * a: arena + * o: arenas_tdata + * Loading TSD data is on the critical path of basically all malloc operations. + * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. + * Use a compact layout to reduce cache footprint. + * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ + * |---------------------------- 1st cacheline ----------------------------| + * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | + * |---------------------------- 2nd cacheline ----------------------------| + * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | + * |---------------------------- 3nd cacheline ----------------------------| + * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | + * +-------------------------------------------------------------------------+ + * Note: the entire tcache is embedded into TSD and spans multiple cachelines. * - * bool example_tsd_boot(void) {...} - * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get(bool init) {...} - * void example_tsd_set(example_t *val) {...} - * - * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike - * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast the - * function argument to (a_type *), then dereference the resulting pointer to - * access fields, e.g. - * - * void - * example_tsd_cleanup(void *arg) - * { - * example_t *example = (example_t *)arg; - * - * example->x = 42; - * [...] - * if ([want the cleanup function to be called again]) - * example_tsd_set(example); - * } - * - * If example_tsd_set() is called within example_tsd_cleanup(), it will be - * called again. This is similar to how pthreads TSD destruction works, except - * that pthreads only calls the cleanup function again if the value was set to - * non-NULL. + * The last 3 members (i, a and o) before tcache isn't really needed on tcache + * fast path. However we have a number of unused tcache bins and witnesses + * (never touched unless config_debug) at the end of tcache, so we place them + * there to avoid breaking the cachelines and possibly paging in an extra page. */ - -/* malloc_tsd_types(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_types(a_name, a_type) -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_types(a_name, a_type) -#elif (defined(_WIN32)) -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#else -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#endif - -/* malloc_tsd_protos(). */ -#define malloc_tsd_protos(a_attr, a_name, a_type) \ -a_attr bool \ -a_name##tsd_boot0(void); \ -a_attr void \ -a_name##tsd_boot1(void); \ -a_attr bool \ -a_name##tsd_boot(void); \ -a_attr bool \ -a_name##tsd_booted_get(void); \ -a_attr a_type * \ -a_name##tsd_get(bool init); \ -a_attr void \ -a_name##tsd_set(a_type *val); - -/* malloc_tsd_externs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern __thread bool a_name##tsd_initialized; \ -extern bool a_name##tsd_booted; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern pthread_key_t a_name##tsd_tsd; \ -extern bool a_name##tsd_booted; -#elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##tsd_tsd; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#else -#define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##tsd_tsd; \ -extern tsd_init_head_t a_name##tsd_init_head; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#endif - -/* malloc_tsd_data(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##tsd_initialized = false; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(_WIN32)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##tsd_tsd; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; +#ifdef JEMALLOC_JET +typedef void (*test_callback_t)(int *); +# define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10 +# define MALLOC_TEST_TSD \ + O(test_data, int, int) \ + O(test_callback, test_callback_t, int) +# define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL #else -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr tsd_init_head_t a_name##tsd_init_head = { \ - ql_head_initializer(blocks), \ - MALLOC_MUTEX_INITIALIZER \ -}; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; +# define MALLOC_TEST_TSD +# define MALLOC_TEST_TSD_INITIALIZER #endif -/* malloc_tsd_funcs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - \ - if (a_name##tsd_initialized) { \ - a_name##tsd_initialized = false; \ - a_cleanup(&a_name##tsd_tls); \ - } \ - return (a_name##tsd_initialized); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##tsd_initialized = true; \ -} -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ - 0) \ - return (true); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)(&a_name##tsd_tls))) { \ - malloc_write("<jemalloc>: Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - } \ -} -#elif (defined(_WIN32)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (wrapper == NULL) \ - return (false); \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - return (true); \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#else -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr void \ -a_name##tsd_cleanup_wrapper(void *arg) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ - \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - return; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - pthread_getspecific(a_name##tsd_tsd); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - tsd_init_block_t block; \ - wrapper = (a_name##tsd_wrapper_t *) \ - tsd_init_check_recursion(&a_name##tsd_init_head, \ - &block); \ - if (wrapper) \ - return (wrapper); \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - block.data = (void *)wrapper; \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - tsd_init_finish(&a_name##tsd_init_head, &block); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) \ -{ \ - \ - return (a_name##tsd_booted); \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) \ -{ \ - \ - return (true); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ - return (NULL); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -struct tsd_init_block_s { - ql_elm(tsd_init_block_t) link; - pthread_t thread; - void *data; -}; -struct tsd_init_head_s { - ql_head(tsd_init_block_t) blocks; - malloc_mutex_t lock; -}; -#endif - -#define MALLOC_TSD \ -/* O(name, type) */ \ - O(tcache, tcache_t *) \ - O(thread_allocated, uint64_t) \ - O(thread_deallocated, uint64_t) \ - O(prof_tdata, prof_tdata_t *) \ - O(iarena, arena_t *) \ - O(arena, arena_t *) \ - O(arenas_tdata, arena_tdata_t *) \ - O(narenas_tdata, unsigned) \ - O(arenas_tdata_bypass, bool) \ - O(tcache_enabled, tcache_enabled_t) \ - O(quarantine, quarantine_t *) \ - O(witnesses, witness_list_t) \ - O(witness_fork, bool) \ - -#define TSD_INITIALIZER { \ +/* O(name, type, nullable type */ +#define MALLOC_TSD \ + O(tcache_enabled, bool, bool) \ + O(arenas_tdata_bypass, bool, bool) \ + O(reentrancy_level, int8_t, int8_t) \ + O(narenas_tdata, uint32_t, uint32_t) \ + O(thread_allocated, uint64_t, uint64_t) \ + O(thread_deallocated, uint64_t, uint64_t) \ + O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ + O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ + O(iarena, arena_t *, arena_t *) \ + O(arena, arena_t *, arena_t *) \ + O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\ + O(tcache, tcache_t, tcache_t) \ + O(witness_tsd, witness_tsd_t, witness_tsdn_t) \ + MALLOC_TEST_TSD + +#define TSD_INITIALIZER { \ tsd_state_uninitialized, \ - NULL, \ + TCACHE_ENABLED_ZERO_INITIALIZER, \ + false, \ + 0, \ + 0, \ 0, \ 0, \ NULL, \ + RTREE_CTX_ZERO_INITIALIZER, \ NULL, \ NULL, \ NULL, \ - 0, \ - false, \ - tcache_enabled_default, \ - NULL, \ - ql_head_initializer(witnesses), \ - false \ + TCACHE_ZERO_INITIALIZER, \ + WITNESS_TSD_INITIALIZER \ + MALLOC_TEST_TSD_INITIALIZER \ } +enum { + tsd_state_nominal = 0, /* Common case --> jnz. */ + tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ + /* the above 2 nominal states should be lower values. */ + tsd_state_nominal_max = 1, /* used for comparison only. */ + tsd_state_purgatory = 2, + tsd_state_reincarnated = 3, + tsd_state_uninitialized = 4 +}; + +/* Manually limit tsd_state_t to a single byte. */ +typedef uint8_t tsd_state_t; + +/* The actual tsd. */ struct tsd_s { + /* + * The contents should be treated as totally opaque outside the tsd + * module. Access any thread-local state through the getters and + * setters below. + */ tsd_state_t state; -#define O(n, t) \ - t n; +#define O(n, t, nt) \ + t use_a_getter_or_setter_instead_##n; MALLOC_TSD #undef O }; @@ -636,153 +127,184 @@ MALLOC_TSD * explicitly converted to tsd_t, which is non-nullable. */ struct tsdn_s { - tsd_t tsd; + tsd_t tsd; }; +#define TSDN_NULL ((tsdn_t *)0) +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) { + return (tsdn_t *)tsd; +} -static const tsd_t tsd_initializer = TSD_INITIALIZER; - -malloc_tsd_types(, tsd_t) - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *arg); -void malloc_tsd_cleanup_register(bool (*f)(void)); -tsd_t *malloc_tsd_boot0(void); -void malloc_tsd_boot1(void); -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -void *tsd_init_check_recursion(tsd_init_head_t *head, - tsd_init_block_t *block); -void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); -#endif -void tsd_cleanup(void *arg); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) - -tsd_t *tsd_fetch_impl(bool init); -tsd_t *tsd_fetch(void); -tsdn_t *tsd_tsdn(tsd_t *tsd); -bool tsd_nominal(tsd_t *tsd); -#define O(n, t) \ -t *tsd_##n##p_get(tsd_t *tsd); \ -t tsd_##n##_get(tsd_t *tsd); \ -void tsd_##n##_set(tsd_t *tsd, t n); -MALLOC_TSD -#undef O -tsdn_t *tsdn_fetch(void); -bool tsdn_null(const tsdn_t *tsdn); -tsd_t *tsdn_tsd(tsdn_t *tsdn); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) -malloc_tsd_externs(, tsd_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch_impl(bool init) -{ - tsd_t *tsd = tsd_get(init); - - if (!init && tsd_get_allocates() && tsd == NULL) - return (NULL); - assert(tsd != NULL); - - if (unlikely(tsd->state != tsd_state_nominal)) { - if (tsd->state == tsd_state_uninitialized) { - tsd->state = tsd_state_nominal; - /* Trigger cleanup handler registration. */ - tsd_set(tsd); - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; - tsd_set(tsd); - } else - assert(tsd->state == tsd_state_reincarnated); - } - - return (tsd); +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) { + return tsdn == NULL; } JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) -{ +tsdn_tsd(tsdn_t *tsdn) { + assert(!tsdn_null(tsdn)); - return (tsd_fetch_impl(true)); + return &tsdn->tsd; } -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsd_tsdn(tsd_t *tsd) -{ +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +void tsd_cleanup(void *arg); +tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal); +void tsd_slow_update(tsd_t *tsd); - return ((tsdn_t *)tsd); +/* + * We put the platform-specific data declarations and inlines into their own + * header files to avoid cluttering this file. They define tsd_boot0, + * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set. + */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#include "jemalloc/internal/tsd_malloc_thread_cleanup.h" +#elif (defined(JEMALLOC_TLS)) +#include "jemalloc/internal/tsd_tls.h" +#elif (defined(_WIN32)) +#include "jemalloc/internal/tsd_win.h" +#else +#include "jemalloc/internal/tsd_generic.h" +#endif + +/* + * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of + * foo. This omits some safety checks, and so can be used during tsd + * initialization and cleanup. + */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get_unsafe(tsd_t *tsd) { \ + return &tsd->use_a_getter_or_setter_instead_##n; \ } +MALLOC_TSD +#undef O -JEMALLOC_INLINE bool -tsd_nominal(tsd_t *tsd) -{ +/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) { \ + assert(tsd->state == tsd_state_nominal || \ + tsd->state == tsd_state_nominal_slow || \ + tsd->state == tsd_state_reincarnated); \ + return tsd_##n##p_get_unsafe(tsd); \ +} +MALLOC_TSD +#undef O - return (tsd->state == tsd_state_nominal); +/* + * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn + * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type. + */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE nt * \ +tsdn_##n##p_get(tsdn_t *tsdn) { \ + if (tsdn_null(tsdn)) { \ + return NULL; \ + } \ + tsd_t *tsd = tsdn_tsd(tsdn); \ + return (nt *)tsd_##n##p_get(tsd); \ } +MALLOC_TSD +#undef O -#define O(n, t) \ -JEMALLOC_ALWAYS_INLINE t * \ -tsd_##n##p_get(tsd_t *tsd) \ -{ \ - \ - return (&tsd->n); \ -} \ - \ +/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */ +#define O(n, t, nt) \ JEMALLOC_ALWAYS_INLINE t \ -tsd_##n##_get(tsd_t *tsd) \ -{ \ - \ - return (*tsd_##n##p_get(tsd)); \ -} \ - \ +tsd_##n##_get(tsd_t *tsd) { \ + return *tsd_##n##p_get(tsd); \ +} +MALLOC_TSD +#undef O + +/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */ +#define O(n, t, nt) \ JEMALLOC_ALWAYS_INLINE void \ -tsd_##n##_set(tsd_t *tsd, t n) \ -{ \ - \ - assert(tsd->state == tsd_state_nominal); \ - tsd->n = n; \ +tsd_##n##_set(tsd_t *tsd, t val) { \ + assert(tsd->state != tsd_state_reincarnated); \ + *tsd_##n##p_get(tsd) = val; \ } MALLOC_TSD #undef O -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsdn_fetch(void) -{ +JEMALLOC_ALWAYS_INLINE void +tsd_assert_fast(tsd_t *tsd) { + assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && + tsd_reentrancy_level_get(tsd) == 0); +} - if (!tsd_booted_get()) - return (NULL); +JEMALLOC_ALWAYS_INLINE bool +tsd_fast(tsd_t *tsd) { + bool fast = (tsd->state == tsd_state_nominal); + if (fast) { + tsd_assert_fast(tsd); + } - return (tsd_tsdn(tsd_fetch_impl(false))); + return fast; } -JEMALLOC_ALWAYS_INLINE bool -tsdn_null(const tsdn_t *tsdn) -{ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch_impl(bool init, bool internal) { + tsd_t *tsd = tsd_get(init); - return (tsdn == NULL); + if (!init && tsd_get_allocates() && tsd == NULL) { + return NULL; + } + assert(tsd != NULL); + + if (unlikely(tsd->state != tsd_state_nominal)) { + return tsd_fetch_slow(tsd, internal); + } + assert(tsd_fast(tsd)); + tsd_assert_fast(tsd); + + return tsd; } JEMALLOC_ALWAYS_INLINE tsd_t * -tsdn_tsd(tsdn_t *tsdn) -{ +tsd_internal_fetch(void) { + return tsd_fetch_impl(true, true); +} - assert(!tsdn_null(tsdn)); +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) { + return tsd_fetch_impl(true, false); +} - return (&tsdn->tsd); +static inline bool +tsd_nominal(tsd_t *tsd) { + return (tsd->state <= tsd_state_nominal_max); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) { + if (!tsd_booted_get()) { + return NULL; + } + + return tsd_tsdn(tsd_fetch_impl(false, false)); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsd_rtree_ctx(tsd_t *tsd) { + return tsd_rtree_ctxp_get(tsd); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) { + /* + * If tsd cannot be accessed, initialize the fallback rtree_ctx and + * return a pointer to it. + */ + if (unlikely(tsdn_null(tsdn))) { + rtree_ctx_data_init(fallback); + return fallback; + } + return tsd_rtree_ctx(tsdn_tsd(tsdn)); } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_TSD_H */ diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h new file mode 100644 index 0000000..1e52ef7 --- /dev/null +++ b/include/jemalloc/internal/tsd_generic.h @@ -0,0 +1,157 @@ +#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_GENERIC_H + +typedef struct tsd_init_block_s tsd_init_block_t; +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; + +/* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */ +typedef struct tsd_init_head_s tsd_init_head_t; + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); + +extern pthread_key_t tsd_tsd; +extern tsd_init_head_t tsd_init_head; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE void +tsd_cleanup_wrapper(void *arg) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg; + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) + { + malloc_write("<jemalloc>: Error setting TSD\n"); + if (opt_abort) { + abort(); + } + } + return; + } + } + malloc_tsd_dalloc(wrapper); +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) { + malloc_write("<jemalloc>: Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd); + + if (init && unlikely(wrapper == NULL)) { + tsd_init_block_t block; + wrapper = (tsd_wrapper_t *) + tsd_init_check_recursion(&tsd_init_head, &block); + if (wrapper) { + return wrapper; + } + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + block.data = (void *)wrapper; + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + } + tsd_wrapper_set(wrapper); + tsd_init_finish(&tsd_init_head, &block); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) { + return true; + } + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h new file mode 100644 index 0000000..beb467a --- /dev/null +++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h @@ -0,0 +1,60 @@ +#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H + +extern __thread tsd_t tsd_tls; +extern __thread bool tsd_initialized; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + if (tsd_initialized) { + tsd_initialized = false; + tsd_cleanup(&tsd_tls); + } + return tsd_initialized; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + assert(tsd_booted); + return &tsd_tls; +} +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + tsd_initialized = true; +} diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h new file mode 100644 index 0000000..757aaa0 --- /dev/null +++ b/include/jemalloc/internal/tsd_tls.h @@ -0,0 +1,59 @@ +#ifdef JEMALLOC_INTERNAL_TSD_TLS_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_TLS_H + +extern __thread tsd_t tsd_tls; +extern pthread_key_t tsd_tsd; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) { + return true; + } + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + assert(tsd_booted); + return &tsd_tls; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) { + malloc_write("<jemalloc>: Error setting tsd.\n"); + if (opt_abort) { + abort(); + } + } +} diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h new file mode 100644 index 0000000..6200af6 --- /dev/null +++ b/include/jemalloc/internal/tsd_types.h @@ -0,0 +1,10 @@ +#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H +#define JEMALLOC_INTERNAL_TSD_TYPES_H + +#define MALLOC_TSD_CLEANUPS_MAX 2 + +typedef struct tsd_s tsd_t; +typedef struct tsdn_s tsdn_t; +typedef bool (*malloc_tsd_cleanup_t)(void); + +#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */ diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h new file mode 100644 index 0000000..cf30d18 --- /dev/null +++ b/include/jemalloc/internal/tsd_win.h @@ -0,0 +1,139 @@ +#ifdef JEMALLOC_INTERNAL_TSD_WIN_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_WIN_H + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +extern DWORD tsd_tsd; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd); + SetLastError(error); + + if (wrapper == NULL) { + return false; + } + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + return true; + } + } + malloc_tsd_dalloc(wrapper); + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (!TlsSetValue(tsd_tsd, (void *)wrapper)) { + malloc_write("<jemalloc>: Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd); + SetLastError(error); + + if (init && unlikely(wrapper == NULL)) { + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + /* MSVC is finicky about aggregate initialization. */ + tsd_t tsd_initializer = TSD_INITIALIZER; + wrapper->val = tsd_initializer; + } + tsd_wrapper_set(wrapper); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + tsd_tsd = TlsAlloc(); + if (tsd_tsd == TLS_OUT_OF_INDEXES) { + return true; + } + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 4b56d65..304cb54 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -1,44 +1,7 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_UTIL_H +#define JEMALLOC_INTERNAL_UTIL_H -#ifdef _WIN32 -# ifdef _WIN64 -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "ll" -# else -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "" -# endif -# define FMTd32 "d" -# define FMTu32 "u" -# define FMTx32 "x" -# define FMTd64 FMT64_PREFIX "d" -# define FMTu64 FMT64_PREFIX "u" -# define FMTx64 FMT64_PREFIX "x" -# define FMTdPTR FMTPTR_PREFIX "d" -# define FMTuPTR FMTPTR_PREFIX "u" -# define FMTxPTR FMTPTR_PREFIX "x" -#else -# include <inttypes.h> -# define FMTd32 PRId32 -# define FMTu32 PRIu32 -# define FMTx32 PRIx32 -# define FMTd64 PRId64 -# define FMTu64 PRIu64 -# define FMTx64 PRIx64 -# define FMTdPTR PRIdPTR -# define FMTuPTR PRIuPTR -# define FMTxPTR PRIxPTR -#endif - -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - -/* - * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be - * large enough for all possible uses within jemalloc. - */ -#define MALLOC_PRINTF_BUFSIZE 4096 +#define UTIL_INLINE static inline /* Junk fill patterns. */ #ifndef JEMALLOC_ALLOC_JUNK @@ -52,25 +15,25 @@ * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. */ -#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ + +/* cpp macro definition stringification. */ +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) /* * Silence compiler warnings due to uninitialized values. This is used * wherever the compiler fails to recognize that the variable is never used * uninitialized. */ -#ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v -#else -# define JEMALLOC_CC_SILENCE_INIT(v) -#endif +#define JEMALLOC_CC_SILENCE_INIT(v) = v #ifdef __GNUC__ -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) #else -# define likely(x) !!(x) -# define unlikely(x) !!(x) +# define likely(x) !!(x) +# define unlikely(x) !!(x) #endif #if !defined(JEMALLOC_INTERNAL_UNREACHABLE) @@ -79,245 +42,9 @@ #define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() -#include "jemalloc/internal/assert.h" - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if (unlikely(!(c))) \ - not_reached(); \ -} while (0) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int buferror(int err, char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *restrict nptr, - char **restrict endptr, int base); -void malloc_write(const char *s); - -/* - * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating - * point math. - */ -size_t malloc_vsnprintf(char *str, size_t size, const char *format, - va_list ap); -size_t malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned ffs_llu(unsigned long long bitmap); -unsigned ffs_lu(unsigned long bitmap); -unsigned ffs_u(unsigned bitmap); -unsigned ffs_zu(size_t bitmap); -unsigned ffs_u64(uint64_t bitmap); -unsigned ffs_u32(uint32_t bitmap); -uint64_t pow2_ceil_u64(uint64_t x); -uint32_t pow2_ceil_u32(uint32_t x); -size_t pow2_ceil_zu(size_t x); -unsigned lg_floor(size_t x); -void set_errno(int errnum); -int get_errno(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) - -/* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ - || !defined(JEMALLOC_INTERNAL_FFS) -# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure -#endif - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_llu(unsigned long long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSLL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_lu(unsigned long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u(unsigned bitmap) -{ - - return (JEMALLOC_INTERNAL_FFS(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_zu(size_t bitmap) -{ - -#if LG_SIZEOF_PTR == LG_SIZEOF_INT - return (ffs_u(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG - return (ffs_llu(bitmap)); -#else -#error No implementation for size_t ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u64(uint64_t bitmap) -{ - -#if LG_SIZEOF_LONG == 3 - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_LONG_LONG == 3 - return (ffs_llu(bitmap)); -#else -#error No implementation for 64-bit ffs() -#endif -} - -JEMALLOC_ALWAYS_INLINE unsigned -ffs_u32(uint32_t bitmap) -{ - -#if LG_SIZEOF_INT == 2 - return (ffs_u(bitmap)); -#else -#error No implementation for 32-bit ffs() -#endif - return (ffs_u(bitmap)); -} - -JEMALLOC_INLINE uint64_t -pow2_ceil_u64(uint64_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x |= x >> 32; - x++; - return (x); -} - -JEMALLOC_INLINE uint32_t -pow2_ceil_u32(uint32_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return (x); -} - -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil_zu(size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return (pow2_ceil_u64(x)); -#else - return (pow2_ceil_u32(x)); -#endif -} - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - size_t ret; - - assert(x != 0); - - asm ("bsr %1, %0" - : "=r"(ret) // Outputs. - : "r"(x) // Inputs. - ); - assert(ret < UINT_MAX); - return ((unsigned)ret); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - unsigned long ret; - - assert(x != 0); - -#if (LG_SIZEOF_PTR == 3) - _BitScanReverse64(&ret, x); -#elif (LG_SIZEOF_PTR == 2) - _BitScanReverse(&ret, x); -#else -# error "Unsupported type size for lg_floor()" -#endif - assert(ret < UINT_MAX); - return ((unsigned)ret); -} -#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - - assert(x != 0); - -#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); -#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); -#else -# error "Unsupported type size for lg_floor()" -#endif -} -#else -JEMALLOC_INLINE unsigned -lg_floor(size_t x) -{ - - assert(x != 0); - - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3) - x |= (x >> 32); -#endif - if (x == SIZE_T_MAX) - return ((8 << LG_SIZEOF_PTR) - 1); - x++; - return (ffs_zu(x) - 2); -} -#endif - /* Set error code. */ -JEMALLOC_INLINE void -set_errno(int errnum) -{ - +UTIL_INLINE void +set_errno(int errnum) { #ifdef _WIN32 SetLastError(errnum); #else @@ -326,17 +53,15 @@ set_errno(int errnum) } /* Get last error code. */ -JEMALLOC_INLINE int -get_errno(void) -{ - +UTIL_INLINE int +get_errno(void) { #ifdef _WIN32 - return (GetLastError()); + return GetLastError(); #else - return (errno); + return errno; #endif } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#undef UTIL_INLINE + +#endif /* JEMALLOC_INTERNAL_UTIL_H */ diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h deleted file mode 100644 index 877a142..0000000 --- a/include/jemalloc/internal/valgrind.h +++ /dev/null @@ -1,128 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#ifdef JEMALLOC_VALGRIND -#include <valgrind/valgrind.h> - -/* - * The size that is reported to Valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_noaccess(ptr, usize); \ -} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_undefined(ptr, usize); \ -} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_defined(ptr, usize); \ -} while (0) -/* - * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro - * calls must be embedded in macros rather than in functions so that when - * Valgrind reports errors, there are no extra stack frames in the backtraces. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do { \ - if (unlikely(in_valgrind && cond)) { \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr), \ - zero); \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC_MOVED_no(ptr, old_ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_MOVED_maybe(ptr, old_ptr) \ - ((ptr) != (old_ptr)) -#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_no(ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_PTR_NULL_maybe(ptr) \ - (ptr == NULL) -#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_no(old_ptr) \ - (false) -#define JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_maybe(old_ptr) \ - (old_ptr == NULL) -#define JEMALLOC_VALGRIND_REALLOC(moved, tsdn, ptr, usize, ptr_null, \ - old_ptr, old_usize, old_rzsize, old_ptr_null, zero) do { \ - if (unlikely(in_valgrind)) { \ - size_t rzsize = p2rz(tsdn, ptr); \ - \ - if (!JEMALLOC_VALGRIND_REALLOC_MOVED_##moved(ptr, \ - old_ptr)) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - valgrind_make_mem_defined( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (!JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_## \ - old_ptr_null(old_ptr)) { \ - valgrind_freelike_block(old_ptr, \ - old_rzsize); \ - } \ - if (!JEMALLOC_VALGRIND_REALLOC_PTR_NULL_## \ - ptr_null(ptr)) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - valgrind_make_mem_defined(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - valgrind_make_mem_defined( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_freelike_block(ptr, rzsize); \ -} while (0) -#else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize, \ - ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ - zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_VALGRIND -void valgrind_make_mem_noaccess(void *ptr, size_t usize); -void valgrind_make_mem_undefined(void *ptr, size_t usize); -void valgrind_make_mem_defined(void *ptr, size_t usize); -void valgrind_freelike_block(void *ptr, size_t usize); -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 30d8c7e..33be666 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -1,29 +1,35 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_WITNESS_H +#define JEMALLOC_INTERNAL_WITNESS_H -typedef struct witness_s witness_t; -typedef unsigned witness_rank_t; -typedef ql_head(witness_t) witness_list_t; -typedef int witness_comp_t (const witness_t *, const witness_t *); +#include "jemalloc/internal/ql.h" + +/******************************************************************************/ +/* LOCK RANKS */ +/******************************************************************************/ /* - * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by - * the witness machinery. + * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness + * machinery. */ -#define WITNESS_RANK_OMIT 0U + +#define WITNESS_RANK_OMIT 0U #define WITNESS_RANK_MIN 1U -#define WITNESS_RANK_INIT 1U -#define WITNESS_RANK_CTL 1U +#define WITNESS_RANK_INIT 1U +#define WITNESS_RANK_CTL 1U #define WITNESS_RANK_TCACHES 2U -#define WITNESS_RANK_ARENAS 3U +#define WITNESS_RANK_ARENAS 3U + +#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL 4U + +#define WITNESS_RANK_PROF_DUMP 5U +#define WITNESS_RANK_PROF_BT2GCTX 6U +#define WITNESS_RANK_PROF_TDATAS 7U +#define WITNESS_RANK_PROF_TDATA 8U +#define WITNESS_RANK_PROF_GCTX 9U -#define WITNESS_RANK_PROF_DUMP 4U -#define WITNESS_RANK_PROF_BT2GCTX 5U -#define WITNESS_RANK_PROF_TDATAS 6U -#define WITNESS_RANK_PROF_TDATA 7U -#define WITNESS_RANK_PROF_GCTX 8U +#define WITNESS_RANK_BACKGROUND_THREAD 10U /* * Used as an argument to witness_assert_depth_to_rank() in order to validate @@ -31,29 +37,44 @@ typedef int witness_comp_t (const witness_t *, const witness_t *); * witness_assert_depth_to_rank() is inclusive rather than exclusive, this * definition can have the same value as the minimally ranked core lock. */ -#define WITNESS_RANK_CORE 9U +#define WITNESS_RANK_CORE 11U + +#define WITNESS_RANK_DECAY 11U +#define WITNESS_RANK_TCACHE_QL 12U +#define WITNESS_RANK_EXTENT_GROW 13U +#define WITNESS_RANK_EXTENTS 14U +#define WITNESS_RANK_EXTENT_AVAIL 15U + +#define WITNESS_RANK_EXTENT_POOL 16U +#define WITNESS_RANK_RTREE 17U +#define WITNESS_RANK_BASE 18U +#define WITNESS_RANK_ARENA_LARGE 19U + +#define WITNESS_RANK_LEAF 0xffffffffU +#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF +#define WITNESS_RANK_DSS WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF -#define WITNESS_RANK_ARENA 9U -#define WITNESS_RANK_ARENA_CHUNKS 10U -#define WITNESS_RANK_ARENA_NODE_CACHE 11U - -#define WITNESS_RANK_BASE 12U - -#define WITNESS_RANK_LEAF 0xffffffffU -#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF -#define WITNESS_RANK_ARENA_HUGE WITNESS_RANK_LEAF -#define WITNESS_RANK_DSS WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF -#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF - -#define WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}} - -#endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* PER-WITNESS DATA */ +/******************************************************************************/ +#if defined(JEMALLOC_DEBUG) +# define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}} +#else +# define WITNESS_INITIALIZER(name, rank) +#endif + +typedef struct witness_s witness_t; +typedef unsigned witness_rank_t; +typedef ql_head(witness_t) witness_list_t; +typedef int witness_comp_t (const witness_t *, void *, const witness_t *, + void *); struct witness_s { /* Name, used for printing lock order reversal messages. */ @@ -72,143 +93,159 @@ struct witness_s { */ witness_comp_t *comp; + /* Opaque data, passed to comp(). */ + void *opaque; + /* Linkage for thread's currently owned locks. */ ql_elm(witness_t) link; }; -#endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +/* PER-THREAD DATA */ +/******************************************************************************/ +typedef struct witness_tsd_s witness_tsd_t; +struct witness_tsd_s { + witness_list_t witnesses; + bool forking; +}; + +#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false } +#define WITNESS_TSDN_NULL ((witness_tsdn_t *)0) + +/******************************************************************************/ +/* (PER-THREAD) NULLABILITY HELPERS */ +/******************************************************************************/ +typedef struct witness_tsdn_s witness_tsdn_t; +struct witness_tsdn_s { + witness_tsd_t witness_tsd; +}; + +JEMALLOC_ALWAYS_INLINE witness_tsdn_t * +witness_tsd_tsdn(witness_tsd_t *witness_tsd) { + return (witness_tsdn_t *)witness_tsd; +} + +JEMALLOC_ALWAYS_INLINE bool +witness_tsdn_null(witness_tsdn_t *witness_tsdn) { + return witness_tsdn == NULL; +} + +JEMALLOC_ALWAYS_INLINE witness_tsd_t * +witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) { + assert(!witness_tsdn_null(witness_tsdn)); + return &witness_tsdn->witness_tsd; +} + +/******************************************************************************/ +/* API */ +/******************************************************************************/ +void witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp, void *opaque); -void witness_init(witness_t *witness, const char *name, witness_rank_t rank, - witness_comp_t *comp); -#ifdef JEMALLOC_JET typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); -extern witness_lock_error_t *witness_lock_error; -#else -void witness_lock_error(const witness_list_t *witnesses, - const witness_t *witness); -#endif -#ifdef JEMALLOC_JET +extern witness_lock_error_t *JET_MUTABLE witness_lock_error; + typedef void (witness_owner_error_t)(const witness_t *); -extern witness_owner_error_t *witness_owner_error; -#else -void witness_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET +extern witness_owner_error_t *JET_MUTABLE witness_owner_error; + typedef void (witness_not_owner_error_t)(const witness_t *); -extern witness_not_owner_error_t *witness_not_owner_error; -#else -void witness_not_owner_error(const witness_t *witness); -#endif -#ifdef JEMALLOC_JET +extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error; + typedef void (witness_depth_error_t)(const witness_list_t *, witness_rank_t rank_inclusive, unsigned depth); -extern witness_depth_error_t *witness_depth_error; -#else -void witness_depth_error(const witness_list_t *witnesses, - witness_rank_t rank_inclusive, unsigned depth); -#endif +extern witness_depth_error_t *JET_MUTABLE witness_depth_error; -void witnesses_cleanup(tsd_t *tsd); -void witness_fork_cleanup(tsd_t *tsd); -void witness_prefork(tsd_t *tsd); -void witness_postfork_parent(tsd_t *tsd); -void witness_postfork_child(tsd_t *tsd); +void witnesses_cleanup(witness_tsd_t *witness_tsd); +void witness_prefork(witness_tsd_t *witness_tsd); +void witness_postfork_parent(witness_tsd_t *witness_tsd); +void witness_postfork_child(witness_tsd_t *witness_tsd); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool witness_owner(tsd_t *tsd, const witness_t *witness); -void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); -void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive, - unsigned depth); -void witness_assert_depth(tsdn_t *tsdn, unsigned depth); -void witness_assert_lockless(tsdn_t *tsdn); -void witness_lock(tsdn_t *tsdn, witness_t *witness); -void witness_unlock(tsdn_t *tsdn, witness_t *witness); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE bool -witness_owner(tsd_t *tsd, const witness_t *witness) -{ +/* Helper, not intended for direct use. */ +static inline bool +witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) { witness_list_t *witnesses; witness_t *w; cassert(config_debug); - witnesses = tsd_witnessesp_get(tsd); + witnesses = &witness_tsd->witnesses; ql_foreach(w, witnesses, link) { - if (w == witness) - return (true); + if (w == witness) { + return true; + } } - return (false); + return false; } -JEMALLOC_INLINE void -witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; +static inline void +witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) { + witness_tsd_t *witness_tsd; - if (!config_debug) + if (!config_debug) { return; + } - if (tsdn_null(tsdn)) + if (witness_tsdn_null(witness_tsdn)) { return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { return; + } - if (witness_owner(tsd, witness)) + if (witness_owner(witness_tsd, witness)) { return; + } witness_owner_error(witness); } -JEMALLOC_INLINE void -witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) -{ - tsd_t *tsd; +static inline void +witness_assert_not_owner(witness_tsdn_t *witness_tsdn, + const witness_t *witness) { + witness_tsd_t *witness_tsd; witness_list_t *witnesses; witness_t *w; - if (!config_debug) + if (!config_debug) { return; + } - if (tsdn_null(tsdn)) + if (witness_tsdn_null(witness_tsdn)) { return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { return; + } - witnesses = tsd_witnessesp_get(tsd); + witnesses = &witness_tsd->witnesses; ql_foreach(w, witnesses, link) { - if (w == witness) + if (w == witness) { witness_not_owner_error(witness); + } } } -JEMALLOC_INLINE void -witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive, - unsigned depth) { - tsd_t *tsd; +static inline void +witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn, + witness_rank_t rank_inclusive, unsigned depth) { + witness_tsd_t *witness_tsd; unsigned d; witness_list_t *witnesses; witness_t *w; - if (!config_debug) + if (!config_debug) { return; + } - if (tsdn_null(tsdn)) + if (witness_tsdn_null(witness_tsdn)) { return; - tsd = tsdn_tsd(tsdn); + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); d = 0; - witnesses = tsd_witnessesp_get(tsd); + witnesses = &witness_tsd->witnesses; w = ql_last(witnesses, link); if (w != NULL) { ql_reverse_foreach(w, witnesses, link) { @@ -218,49 +255,53 @@ witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive, d++; } } - if (d != depth) + if (d != depth) { witness_depth_error(witnesses, rank_inclusive, depth); + } } -JEMALLOC_INLINE void -witness_assert_depth(tsdn_t *tsdn, unsigned depth) { - witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth); +static inline void +witness_assert_depth(witness_tsdn_t *witness_tsdn, unsigned depth) { + witness_assert_depth_to_rank(witness_tsdn, WITNESS_RANK_MIN, depth); } -JEMALLOC_INLINE void -witness_assert_lockless(tsdn_t *tsdn) { - witness_assert_depth(tsdn, 0); +static inline void +witness_assert_lockless(witness_tsdn_t *witness_tsdn) { + witness_assert_depth(witness_tsdn, 0); } -JEMALLOC_INLINE void -witness_lock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; +static inline void +witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) { + witness_tsd_t *witness_tsd; witness_list_t *witnesses; witness_t *w; - if (!config_debug) + if (!config_debug) { return; + } - if (tsdn_null(tsdn)) + if (witness_tsdn_null(witness_tsdn)) { return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { return; + } - witness_assert_not_owner(tsdn, witness); + witness_assert_not_owner(witness_tsdn, witness); - witnesses = tsd_witnessesp_get(tsd); + witnesses = &witness_tsd->witnesses; w = ql_last(witnesses, link); if (w == NULL) { /* No other locks; do nothing. */ - } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { + } else if (witness_tsd->forking && w->rank <= witness->rank) { /* Forking, and relaxed ranking satisfied. */ } else if (w->rank > witness->rank) { /* Not forking, rank order reversal. */ witness_lock_error(witnesses, witness); } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != - witness->comp || w->comp(w, witness) > 0)) { + witness->comp || w->comp(w, w->opaque, witness, witness->opaque) > + 0)) { /* * Missing/incompatible comparison function, or comparison * function indicates rank order reversal. @@ -272,33 +313,34 @@ witness_lock(tsdn_t *tsdn, witness_t *witness) ql_tail_insert(witnesses, witness, link); } -JEMALLOC_INLINE void -witness_unlock(tsdn_t *tsdn, witness_t *witness) -{ - tsd_t *tsd; +static inline void +witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) { + witness_tsd_t *witness_tsd; witness_list_t *witnesses; - if (!config_debug) + if (!config_debug) { return; + } - if (tsdn_null(tsdn)) + if (witness_tsdn_null(witness_tsdn)) { return; - tsd = tsdn_tsd(tsdn); - if (witness->rank == WITNESS_RANK_OMIT) + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { return; + } /* * Check whether owner before removal, rather than relying on * witness_assert_owner() to abort, so that unit tests can test this * function's failure mode without causing undefined behavior. */ - if (witness_owner(tsd, witness)) { - witnesses = tsd_witnessesp_get(tsd); + if (witness_owner(witness_tsd, witness)) { + witnesses = &witness_tsd->witnesses; ql_remove(witnesses, witness, link); - } else - witness_assert_owner(tsdn, witness); + } else { + witness_assert_owner(witness_tsdn, witness); + } } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_WITNESS_H */ diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh index c085814..b19b154 100755 --- a/include/jemalloc/jemalloc.sh +++ b/include/jemalloc/jemalloc.sh @@ -4,7 +4,7 @@ objroot=$1 cat <<EOF #ifndef JEMALLOC_H_ -#define JEMALLOC_H_ +#define JEMALLOC_H_ #ifdef __cplusplus extern "C" { #endif @@ -15,7 +15,6 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \ jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do cat "${objroot}include/jemalloc/${hdr}" \ | grep -v 'Generated from .* by configure\.' \ - | sed -e 's/^#define /#define /g' \ | sed -e 's/ $//g' echo done diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 129240e..aee5543 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -4,32 +4,51 @@ #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) ((int)(la)) -# if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -# else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -# endif -# define MALLOCX_ZERO ((int)0x40) +#define MALLOCX_LG_ALIGN(la) ((int)(la)) +#if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) +#else +# define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)(((size_t)(a))>>32))+31)) +#endif +#define MALLOCX_ZERO ((int)0x40) /* * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 * encodes MALLOCX_TCACHE_NONE. */ -# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) /* * Bias arena index bits so that 0 encodes "use an automatically chosen arena". */ -# define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) +#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) + +/* + * Use as arena index in "arena.<i>.{purge,decay,dss}" and + * "stats.arenas.<i>.*" mallctl interfaces to select all arenas. This + * definition is intentionally specified in raw decimal format to support + * cpp-based string concatenation, e.g. + * + * #define STRINGIFY_HELPER(x) #x + * #define STRINGIFY(x) STRINGIFY_HELPER(x) + * + * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, + * 0); + */ +#define MALLCTL_ARENAS_ALL 4096 +/* + * Use as arena index in "stats.arenas.<i>.*" mallctl interfaces to select + * destroyed arenas. + */ +#define MALLCTL_ARENAS_DESTROYED 4097 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) # define JEMALLOC_CXX_THROW throw() @@ -37,7 +56,7 @@ # define JEMALLOC_CXX_THROW #endif -#if _MSC_VER +#if defined(_MSC_VER) # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in index fa7b350..1a58874 100644 --- a/include/jemalloc/jemalloc_typedefs.h.in +++ b/include/jemalloc/jemalloc_typedefs.h.in @@ -1,57 +1,77 @@ +typedef struct extent_hooks_s extent_hooks_t; + /* * void * - * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, - * bool *commit, unsigned arena_ind); + * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, + * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); */ -typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned); +typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, + bool *, unsigned); /* * bool - * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind); + * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); */ -typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned); +typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); + +/* + * void + * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); + */ +typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); /* * bool - * chunk_commit(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); /* * bool - * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, + size_t, unsigned); /* * bool - * chunk_purge(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); /* * bool - * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b, - * bool committed, unsigned arena_ind); + * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned); +typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + bool, unsigned); /* * bool - * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, - * bool committed, unsigned arena_ind); + * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, + * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned); - -typedef struct { - chunk_alloc_t *alloc; - chunk_dalloc_t *dalloc; - chunk_commit_t *commit; - chunk_decommit_t *decommit; - chunk_purge_t *purge; - chunk_split_t *split; - chunk_merge_t *merge; -} chunk_hooks_t; +typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, + bool, unsigned); + +struct extent_hooks_s { + extent_alloc_t *alloc; + extent_dalloc_t *dalloc; + extent_destroy_t *destroy; + extent_commit_t *commit; + extent_decommit_t *decommit; + extent_purge_t *purge_lazy; + extent_purge_t *purge_forced; + extent_split_t *split; + extent_merge_t *merge; +}; |
