summaryrefslogtreecommitdiffstats
path: root/include/jemalloc
diff options
context:
space:
mode:
Diffstat (limited to 'include/jemalloc')
-rw-r--r--include/jemalloc/internal/arena_externs.h17
-rw-r--r--include/jemalloc/internal/arena_inlines_b.h7
-rw-r--r--include/jemalloc/internal/arena_stats.h237
-rw-r--r--include/jemalloc/internal/arena_structs_b.h85
-rw-r--r--include/jemalloc/internal/arena_types.h2
-rw-r--r--include/jemalloc/internal/base_externs.h5
-rw-r--r--include/jemalloc/internal/base_inlines.h4
-rw-r--r--include/jemalloc/internal/base_structs.h4
-rw-r--r--include/jemalloc/internal/base_types.h26
-rw-r--r--include/jemalloc/internal/bin.h106
-rw-r--r--include/jemalloc/internal/bin_stats.h51
-rw-r--r--include/jemalloc/internal/cache_bin.h114
-rw-r--r--include/jemalloc/internal/ctl.h5
-rw-r--r--include/jemalloc/internal/div.h41
-rw-r--r--include/jemalloc/internal/extent_externs.h9
-rw-r--r--include/jemalloc/internal/extent_inlines.h34
-rw-r--r--include/jemalloc/internal/extent_structs.h102
-rw-r--r--include/jemalloc/internal/extent_types.h8
-rw-r--r--include/jemalloc/internal/hash.h28
-rw-r--r--include/jemalloc/internal/jemalloc_internal_defs.h.in27
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_a.h9
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_c.h27
-rw-r--r--include/jemalloc/internal/jemalloc_internal_macros.h3
-rw-r--r--include/jemalloc/internal/jemalloc_internal_types.h11
-rw-r--r--include/jemalloc/internal/jemalloc_preamble.h.in22
-rw-r--r--include/jemalloc/internal/log.h115
-rw-r--r--include/jemalloc/internal/malloc_io.h4
-rw-r--r--include/jemalloc/internal/mutex_prof.h31
-rw-r--r--include/jemalloc/internal/pages.h5
-rw-r--r--include/jemalloc/internal/rtree.h12
-rw-r--r--include/jemalloc/internal/rtree_tsd.h2
-rw-r--r--include/jemalloc/internal/spin.h20
-rw-r--r--include/jemalloc/internal/stats.h134
-rw-r--r--include/jemalloc/internal/stats_tsd.h12
-rw-r--r--include/jemalloc/internal/sz.h4
-rw-r--r--include/jemalloc/internal/tcache_externs.h8
-rw-r--r--include/jemalloc/internal/tcache_inlines.h95
-rw-r--r--include/jemalloc/internal/tcache_structs.h71
-rw-r--r--include/jemalloc/internal/tcache_types.h5
-rw-r--r--include/jemalloc/internal/tsd.h2
-rw-r--r--include/jemalloc/internal/witness.h2
-rwxr-xr-xinclude/jemalloc/jemalloc_mangle.sh2
42 files changed, 1069 insertions, 439 deletions
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index af16d15..4b3732b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/extent_dss.h"
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/size_classes.h"
@@ -9,25 +10,19 @@
extern ssize_t opt_dirty_decay_ms;
extern ssize_t opt_muzzy_decay_ms;
-extern const arena_bin_info_t arena_bin_info[NBINS];
-
extern percpu_arena_mode_t opt_percpu_arena;
extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock;
-void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
- szind_t szind, uint64_t nrequests);
-void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
- size_t size);
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
- malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+ bin_stats_t *bstats, arena_stats_large_t *lstats);
void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extent_t *extent);
#ifdef JEMALLOC_JET
@@ -50,11 +45,11 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
- tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+ cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
bool zero);
-typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
@@ -77,6 +72,8 @@ ssize_t arena_dirty_decay_ms_default_get(void);
bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
ssize_t arena_muzzy_decay_ms_default_get(void);
bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
+ size_t *old_limit, size_t *new_limit);
unsigned arena_nthreads_get(arena_t *arena, bool internal);
void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 003abe1..7b10d9e 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,13 +8,6 @@
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
-static inline szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin) {
- szind_t binind = (szind_t)(bin - arena->bins);
- assert(binind < NBINS);
- return binind;
-}
-
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
new file mode 100644
index 0000000..837d4eb
--- /dev/null
+++ b/include/jemalloc/internal/arena_stats.h
@@ -0,0 +1,237 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
+#define JEMALLOC_INTERNAL_ARENA_STATS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/size_classes.h"
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
+typedef struct arena_stats_large_s arena_stats_large_t;
+struct arena_stats_large_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the arena.
+ */
+ arena_stats_u64_t nmalloc;
+ arena_stats_u64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to this size class.
+ * This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ arena_stats_u64_t nrequests; /* Partially derived. */
+
+ /* Current number of allocations of this size class. */
+ size_t curlextents; /* Derived. */
+};
+
+typedef struct arena_stats_decay_s arena_stats_decay_t;
+struct arena_stats_decay_s {
+ /* Total number of purge sweeps. */
+ arena_stats_u64_t npurge;
+ /* Total number of madvise calls made. */
+ arena_stats_u64_t nmadvise;
+ /* Total number of pages purged. */
+ arena_stats_u64_t purged;
+};
+
+/*
+ * Arena stats. Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
+typedef struct arena_stats_s arena_stats_t;
+struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_t mtx;
+#endif
+
+ /* Number of bytes currently mapped, excluding retained memory. */
+ atomic_zu_t mapped; /* Partially derived. */
+
+ /*
+ * Number of unused virtual memory bytes currently retained. Retained
+ * bytes are technically mapped (though always decommitted or purged),
+ * but they are excluded from the mapped statistic (above).
+ */
+ atomic_zu_t retained; /* Derived. */
+
+ arena_stats_decay_t decay_dirty;
+ arena_stats_decay_t decay_muzzy;
+
+ atomic_zu_t base; /* Derived. */
+ atomic_zu_t internal;
+ atomic_zu_t resident; /* Derived. */
+ atomic_zu_t metadata_thp;
+
+ atomic_zu_t allocated_large; /* Derived. */
+ arena_stats_u64_t nmalloc_large; /* Derived. */
+ arena_stats_u64_t ndalloc_large; /* Derived. */
+ arena_stats_u64_t nrequests_large; /* Derived. */
+
+ /* Number of bytes cached in tcache associated with this arena. */
+ atomic_zu_t tcache_bytes; /* Derived. */
+
+ mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
+
+ /* One element for each large size class. */
+ arena_stats_large_t lstats[NSIZES - NBINS];
+
+ /* Arena uptime. */
+ nstime_t uptime;
+};
+
+static inline bool
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+ if (config_debug) {
+ for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+ assert(((char *)arena_stats)[i] == 0);
+ }
+ }
+#ifndef JEMALLOC_ATOMIC_U64
+ if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+ WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+#endif
+ /* Memory is zeroed, so there is no need to clear stats. */
+ return false;
+}
+
+static inline void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ return *p;
+#endif
+}
+
+static inline void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ *p += x;
+#endif
+}
+
+UNUSED static inline void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ *p -= x;
+ assert(*p + x >= *p);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src. *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+ uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+ atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+ *dst += src;
+#endif
+}
+
+static inline size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_zu(p, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ return atomic_load_zu(p, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+ size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+ atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+ size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+ atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+ size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+ atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ szind_t szind, uint64_t nrequests) {
+ arena_stats_lock(tsdn, arena_stats);
+ arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+ NBINS].nrequests, nrequests);
+ arena_stats_unlock(tsdn, arena_stats);
+}
+
+static inline void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+ arena_stats_lock(tsdn, arena_stats);
+ arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+ arena_stats_unlock(tsdn, arena_stats);
+}
+
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d1fffec..38bc959 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -1,7 +1,9 @@
#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#include "jemalloc/internal/arena_stats.h"
#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent_dss.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
@@ -10,45 +12,8 @@
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ticker.h"
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- * /--------------------\
- * | region 0 |
- * |--------------------|
- * | region 1 |
- * |--------------------|
- * | ... |
- * | ... |
- * | ... |
- * |--------------------|
- * | region nregs-1 |
- * \--------------------/
- */
-struct arena_bin_info_s {
- /* Size of regions in a slab for this bin's size class. */
- size_t reg_size;
-
- /* Total size of a slab for this bin's size class. */
- size_t slab_size;
-
- /* Total number of regions in a slab for this bin's size class. */
- uint32_t nregs;
-
- /*
- * Metadata used to manipulate bitmaps for slabs associated with this
- * bin.
- */
- bitmap_info_t bitmap_info;
-};
-
struct arena_decay_s {
/* Synchronizes all non-atomic fields. */
malloc_mutex_t mtx;
@@ -104,37 +69,11 @@ struct arena_decay_s {
* arena and ctl code.
*
* Synchronization: Same as associated arena's stats field. */
- decay_stats_t *stats;
+ arena_stats_decay_t *stats;
/* Peak number of pages in associated extents. Used for debug only. */
uint64_t ceil_npages;
};
-struct arena_bin_s {
- /* All operations on arena_bin_t fields require lock ownership. */
- malloc_mutex_t lock;
-
- /*
- * Current slab being used to service allocations of this bin's size
- * class. slabcur is independent of slabs_{nonfull,full}; whenever
- * slabcur is reassigned, the previous slab must be deallocated or
- * inserted into slabs_{nonfull,full}.
- */
- extent_t *slabcur;
-
- /*
- * Heap of non-full slabs. This heap is used to assure that new
- * allocations come from the non-full slab that is oldest/lowest in
- * memory.
- */
- extent_heap_t slabs_nonfull;
-
- /* List used to track full slabs. */
- extent_list_t slabs_full;
-
- /* Bin statistics. */
- malloc_bin_stats_t stats;
-};
-
struct arena_s {
/*
* Number of threads currently assigned to this arena. Each thread has
@@ -162,14 +101,15 @@ struct arena_s {
arena_stats_t stats;
/*
- * List of tcaches for extant threads associated with this arena.
- * Stats from these are merged incrementally, and at exit if
- * opt_stats_print is enabled.
+ * Lists of tcaches and cache_bin_array_descriptors for extant threads
+ * associated with this arena. Stats from these are merged
+ * incrementally, and at exit if opt_stats_print is enabled.
*
* Synchronization: tcache_ql_mtx.
*/
- ql_head(tcache_t) tcache_ql;
- malloc_mutex_t tcache_ql_mtx;
+ ql_head(tcache_t) tcache_ql;
+ ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+ malloc_mutex_t tcache_ql_mtx;
/* Synchronization: internal. */
prof_accum_t prof_accum;
@@ -239,9 +179,14 @@ struct arena_s {
* be effective even if multiple arenas' extent allocation requests are
* highly interleaved.
*
+ * retain_grow_limit is the max allowed size ind to expand (unless the
+ * required size is greater). Default is no limit, and controlled
+ * through mallctl only.
+ *
* Synchronization: extent_grow_mtx
*/
pszind_t extent_grow_next;
+ pszind_t retain_grow_limit;
malloc_mutex_t extent_grow_mtx;
/*
@@ -258,7 +203,7 @@ struct arena_s {
*
* Synchronization: internal.
*/
- arena_bin_t bins[NBINS];
+ bin_t bins[NBINS];
/*
* Base allocator, from which arena metadata are allocated.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a691bd8..70001b5 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,9 +12,7 @@
#define DECAY_NTICKS_PER_UPDATE 1000
typedef struct arena_slab_data_s arena_slab_data_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
typedef struct arena_tdata_s arena_tdata_t;
typedef struct alloc_ctx_s alloc_ctx_t;
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a4fd5ac..7b705c9 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,6 +1,9 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
+
base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void base_delete(tsdn_t *tsdn, base_t *base);
@@ -10,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
- size_t *resident, size_t *mapped);
+ size_t *resident, size_t *mapped, size_t *n_thp);
void base_prefork(tsdn_t *tsdn, base_t *base);
void base_postfork_parent(tsdn_t *tsdn, base_t *base);
void base_postfork_child(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 931560b..aec0e2e 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
return base->ind;
}
+static inline bool
+metadata_thp_enabled(void) {
+ return (opt_metadata_thp != metadata_thp_disabled);
+}
#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 18e227b..2102247 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -30,6 +30,8 @@ struct base_s {
/* Protects base_alloc() and base_stats_get() operations. */
malloc_mutex_t mtx;
+ /* Using THP when true (metadata_thp auto mode). */
+ bool auto_thp_switched;
/*
* Most recent size class in the series of increasingly large base
* extents. Logarithmic spacing between subsequent allocations ensures
@@ -50,6 +52,8 @@ struct base_s {
size_t allocated;
size_t resident;
size_t mapped;
+ /* Number of THP regions touched. */
+ size_t n_thp;
};
#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index be7ee82..b6db77d 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,4 +4,30 @@
typedef struct base_block_s base_block_t;
typedef struct base_s base_t;
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block. a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD 2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
+typedef enum {
+ metadata_thp_disabled = 0,
+ /*
+ * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+ * + low usage arena (i.e. THP becomes a significant percentage), the
+ * "auto" option only starts using THP after a base allocator used up
+ * the first THP region. Starting from the second hugepage (in a single
+ * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+ * right away.
+ */
+ metadata_thp_auto = 1,
+ metadata_thp_always = 2,
+ metadata_thp_mode_limit = 3
+} metadata_thp_mode_t;
+
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
new file mode 100644
index 0000000..9b416ad
--- /dev/null
+++ b/include/jemalloc/internal/bin.h
@@ -0,0 +1,106 @@
+#ifndef JEMALLOC_INTERNAL_BIN_H
+#define JEMALLOC_INTERNAL_BIN_H
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/bin_stats.h"
+
+/*
+ * A bin contains a set of extents that are currently being used for slab
+ * allocations.
+ */
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ * /--------------------\
+ * | region 0 |
+ * |--------------------|
+ * | region 1 |
+ * |--------------------|
+ * | ... |
+ * | ... |
+ * | ... |
+ * |--------------------|
+ * | region nregs-1 |
+ * \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+ /* Size of regions in a slab for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a slab for this bin's size class. */
+ size_t slab_size;
+
+ /* Total number of regions in a slab for this bin's size class. */
+ uint32_t nregs;
+
+ /*
+ * Metadata used to manipulate bitmaps for slabs associated with this
+ * bin.
+ */
+ bitmap_info_t bitmap_info;
+};
+
+extern const bin_info_t bin_infos[NBINS];
+
+
+typedef struct bin_s bin_t;
+struct bin_s {
+ /* All operations on bin_t fields require lock ownership. */
+ malloc_mutex_t lock;
+
+ /*
+ * Current slab being used to service allocations of this bin's size
+ * class. slabcur is independent of slabs_{nonfull,full}; whenever
+ * slabcur is reassigned, the previous slab must be deallocated or
+ * inserted into slabs_{nonfull,full}.
+ */
+ extent_t *slabcur;
+
+ /*
+ * Heap of non-full slabs. This heap is used to assure that new
+ * allocations come from the non-full slab that is oldest/lowest in
+ * memory.
+ */
+ extent_heap_t slabs_nonfull;
+
+ /* List used to track full slabs. */
+ extent_list_t slabs_full;
+
+ /* Bin statistics. */
+ bin_stats_t stats;
+};
+
+/* Initializes a bin to empty. Returns true on error. */
+bool bin_init(bin_t *bin);
+
+/* Forking. */
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
+
+/* Stats. */
+static inline void
+bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
+ malloc_mutex_lock(tsdn, &bin->lock);
+ malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+ dst_bin_stats->nmalloc += bin->stats.nmalloc;
+ dst_bin_stats->ndalloc += bin->stats.ndalloc;
+ dst_bin_stats->nrequests += bin->stats.nrequests;
+ dst_bin_stats->curregs += bin->stats.curregs;
+ dst_bin_stats->nfills += bin->stats.nfills;
+ dst_bin_stats->nflushes += bin->stats.nflushes;
+ dst_bin_stats->nslabs += bin->stats.nslabs;
+ dst_bin_stats->reslabs += bin->stats.reslabs;
+ dst_bin_stats->curslabs += bin->stats.curslabs;
+ malloc_mutex_unlock(tsdn, &bin->lock);
+}
+
+#endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
new file mode 100644
index 0000000..86e673e
--- /dev/null
+++ b/include/jemalloc/internal/bin_stats.h
@@ -0,0 +1,51 @@
+#ifndef JEMALLOC_INTERNAL_BIN_STATS_H
+#define JEMALLOC_INTERNAL_BIN_STATS_H
+
+#include "jemalloc/internal/mutex_prof.h"
+
+typedef struct bin_stats_s bin_stats_t;
+struct bin_stats_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the bin. Note that tcache may allocate an object, then recycle it
+ * many times, resulting many increments to nrequests, but only one
+ * each to nmalloc and ndalloc.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to the size of this
+ * bin. This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ uint64_t nrequests;
+
+ /*
+ * Current number of regions of this size class, including regions
+ * currently cached by tcache.
+ */
+ size_t curregs;
+
+ /* Number of tcache fills from this bin. */
+ uint64_t nfills;
+
+ /* Number of tcache flushes to this bin. */
+ uint64_t nflushes;
+
+ /* Total number of slabs created for this bin's size class. */
+ uint64_t nslabs;
+
+ /*
+ * Total number of slabs reused by extracting them from the slabs heap
+ * for this bin's size class.
+ */
+ uint64_t reslabs;
+
+ /* Current number of slabs in this bin. */
+ size_t curslabs;
+
+ mutex_prof_data_t mutex_data;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
new file mode 100644
index 0000000..12f3ef2
--- /dev/null
+++ b/include/jemalloc/internal/cache_bin.h
@@ -0,0 +1,114 @@
+#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
+#define JEMALLOC_INTERNAL_CACHE_BIN_H
+
+#include "jemalloc/internal/ql.h"
+
+/*
+ * The cache_bins are the mechanism that the tcache and the arena use to
+ * communicate. The tcache fills from and flushes to the arena by passing a
+ * cache_bin_t to fill/flush. When the arena needs to pull stats from the
+ * tcaches associated with it, it does so by iterating over its
+ * cache_bin_array_descriptor_t objects and reading out per-bin stats it
+ * contains. This makes it so that the arena need not know about the existence
+ * of the tcache at all.
+ */
+
+
+/*
+ * The count of the number of cached allocations in a bin. We make this signed
+ * so that negative numbers can encode "invalid" states (e.g. a low water mark
+ * of -1 for a cache that has been depleted).
+ */
+typedef int32_t cache_bin_sz_t;
+
+typedef struct cache_bin_stats_s cache_bin_stats_t;
+struct cache_bin_stats_s {
+ /*
+ * Number of allocation requests that corresponded to the size of this
+ * bin.
+ */
+ uint64_t nrequests;
+};
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+typedef struct cache_bin_info_s cache_bin_info_t;
+struct cache_bin_info_s {
+ /* Upper limit on ncached. */
+ cache_bin_sz_t ncached_max;
+};
+
+typedef struct cache_bin_s cache_bin_t;
+struct cache_bin_s {
+ /* Min # cached since last GC. */
+ cache_bin_sz_t low_water;
+ /* # of cached objects. */
+ cache_bin_sz_t ncached;
+ /*
+ * ncached and stats are both modified frequently. Let's keep them
+ * close so that they have a higher chance of being on the same
+ * cacheline, thus less write-backs.
+ */
+ cache_bin_stats_t tstats;
+ /*
+ * Stack of available objects.
+ *
+ * To make use of adjacent cacheline prefetch, the items in the avail
+ * stack goes to higher address for newer allocations. avail points
+ * just above the available space, which means that
+ * avail[-ncached, ... -1] are available items and the lowest item will
+ * be allocated first.
+ */
+ void **avail;
+};
+
+typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
+struct cache_bin_array_descriptor_s {
+ /*
+ * The arena keeps a list of the cache bins associated with it, for
+ * stats collection.
+ */
+ ql_elm(cache_bin_array_descriptor_t) link;
+ /* Pointers to the tcache bins. */
+ cache_bin_t *bins_small;
+ cache_bin_t *bins_large;
+};
+
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+ cache_bin_t *bins_small, cache_bin_t *bins_large) {
+ ql_elm_new(descriptor, link);
+ descriptor->bins_small = bins_small;
+ descriptor->bins_large = bins_large;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
+ void *ret;
+
+ if (unlikely(bin->ncached == 0)) {
+ bin->low_water = -1;
+ *success = false;
+ return NULL;
+ }
+ /*
+ * success (instead of ret) should be checked upon the return of this
+ * function. We avoid checking (ret == NULL) because there is never a
+ * null stored on the avail stack (which is unknown to the compiler),
+ * and eagerly checking ret would cause pipeline stall (waiting for the
+ * cacheline).
+ */
+ *success = true;
+ ret = *(bin->avail - bin->ncached);
+ bin->ncached--;
+
+ if (unlikely(bin->ncached < bin->low_water)) {
+ bin->low_water = bin->ncached;
+ }
+
+ return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a91c4cf..d927d94 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -40,14 +40,15 @@ typedef struct ctl_arena_stats_s {
uint64_t ndalloc_small;
uint64_t nrequests_small;
- malloc_bin_stats_t bstats[NBINS];
- malloc_large_stats_t lstats[NSIZES - NBINS];
+ bin_stats_t bstats[NBINS];
+ arena_stats_large_t lstats[NSIZES - NBINS];
} ctl_arena_stats_t;
typedef struct ctl_stats_s {
size_t allocated;
size_t active;
size_t metadata;
+ size_t metadata_thp;
size_t resident;
size_t mapped;
size_t retained;
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
new file mode 100644
index 0000000..aebae93
--- /dev/null
+++ b/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+ uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+ size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+ assert(n <= (uint32_t)-1);
+ /*
+ * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+ * the compilers I tried were all smart enough to turn this into the
+ * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+ * mul; mov edx eax; on x86, umull on arm, etc.).
+ */
+ size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+ assert(i * div_info->d == n);
+#endif
+ return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 489a813..b8a4d02 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -4,12 +4,13 @@
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mutex_pool.h"
#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rb.h"
#include "jemalloc/internal/rtree.h"
-extern rtree_t extents_rtree;
-extern const extent_hooks_t extent_hooks_default;
-extern mutex_pool_t extent_mutex_pool;
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+extern const extent_hooks_t extent_hooks_default;
+extern mutex_pool_t extent_mutex_pool;
extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index bb2bd69..9b8ddc2 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -94,6 +94,12 @@ extent_committed_get(const extent_t *extent) {
}
static inline bool
+extent_dumpable_get(const extent_t *extent) {
+ return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+ EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
extent_slab_get(const extent_t *extent) {
return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
EXTENT_BITS_SLAB_SHIFT);
@@ -190,9 +196,16 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
if (alignment < PAGE) {
unsigned lg_range = LG_PAGE -
lg_floor(CACHELINE_CEILING(alignment));
- size_t r =
- prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
- lg_range, true);
+ size_t r;
+ if (!tsdn_null(tsdn)) {
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ r = (size_t)prng_lg_range_u64(
+ tsd_offset_statep_get(tsd), lg_range);
+ } else {
+ r = prng_lg_range_zu(
+ &extent_arena_get(extent)->offset_state,
+ lg_range, true);
+ }
uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
lg_range);
extent->e_addr = (void *)((uintptr_t)extent->e_addr +
@@ -270,6 +283,12 @@ extent_committed_set(extent_t *extent, bool committed) {
}
static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+ ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
extent_slab_set(extent_t *extent, bool slab) {
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
@@ -283,7 +302,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
static inline void
extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
- bool committed) {
+ bool committed, bool dumpable) {
assert(addr == PAGE_ADDR2BASE(addr) || !slab);
extent_arena_set(extent, arena);
@@ -295,6 +314,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
extent_state_set(extent, state);
extent_zeroed_set(extent, zeroed);
extent_committed_set(extent, committed);
+ extent_dumpable_set(extent, dumpable);
ql_elm_new(extent, ql_link);
if (config_prof) {
extent_prof_tctx_set(extent, NULL);
@@ -312,6 +332,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
extent_state_set(extent, extent_state_active);
extent_zeroed_set(extent, true);
extent_committed_set(extent, true);
+ extent_dumpable_set(extent, true);
}
static inline void
@@ -335,6 +356,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) {
}
static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+ ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
extent_list_replace(extent_list_t *list, extent_t *to_remove,
extent_t *to_insert) {
ql_after_insert(to_remove, to_insert, ql_link);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index d297950..4873b9e 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -5,7 +5,6 @@
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rb.h"
#include "jemalloc/internal/ph.h"
#include "jemalloc/internal/size_classes.h"
@@ -24,13 +23,14 @@ struct extent_s {
* a: arena_ind
* b: slab
* c: committed
+ * d: dumpable
* z: zeroed
* t: state
* i: szind
* f: nfree
* n: sn
*
- * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
+ * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
*
* arena_ind: Arena from which this extent came, or all 1 bits if
* unassociated.
@@ -45,6 +45,23 @@ struct extent_s {
* as on a system that overcommits and satisfies physical
* memory needs on demand via soft page faults.
*
+ * dumpable: The dumpable flag indicates whether or not we've set the
+ * memory in question to be dumpable. Note that this
+ * interacts somewhat subtly with user-specified extent hooks,
+ * since we don't know if *they* are fiddling with
+ * dumpability (in which case, we don't want to undo whatever
+ * they're doing). To deal with this scenario, we:
+ * - Make dumpable false only for memory allocated with the
+ * default hooks.
+ * - Only allow memory to go from non-dumpable to dumpable,
+ * and only once.
+ * - Never make the OS call to allow dumping when the
+ * dumpable bit is already set.
+ * These three constraints mean that we will never
+ * accidentally dump user memory that the user meant to set
+ * nondumpable with their extent hooks.
+ *
+ *
* zeroed: The zeroed flag is used by extent recycling code to track
* whether memory is zero-filled.
*
@@ -69,38 +86,42 @@ struct extent_s {
* serial number to both resulting adjacent extents.
*/
uint64_t e_bits;
-#define EXTENT_BITS_ARENA_SHIFT 0
-#define EXTENT_BITS_ARENA_MASK \
- (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT 0
+#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_SHIFT MALLOCX_ARENA_BITS
-#define EXTENT_BITS_SLAB_MASK \
- ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_SLAB_WIDTH 1
+#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_SHIFT (MALLOCX_ARENA_BITS + 1)
-#define EXTENT_BITS_COMMITTED_MASK \
- ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_COMMITTED_WIDTH 1
+#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_ZEROED_SHIFT (MALLOCX_ARENA_BITS + 2)
-#define EXTENT_BITS_ZEROED_MASK \
- ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_WIDTH 1
+#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_STATE_SHIFT (MALLOCX_ARENA_BITS + 3)
-#define EXTENT_BITS_STATE_MASK \
- ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_ZEROED_WIDTH 1
+#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_SZIND_SHIFT (MALLOCX_ARENA_BITS + 5)
-#define EXTENT_BITS_SZIND_MASK \
- (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_STATE_WIDTH 2
+#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_NFREE_SHIFT \
- (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
-#define EXTENT_BITS_NFREE_MASK \
- ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SZIND_WIDTH LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_SN_SHIFT \
- (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
-#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
/* Pointer to the extent that this structure is responsible for. */
void *e_addr;
@@ -120,20 +141,19 @@ struct extent_s {
size_t e_bsize;
};
- union {
- /*
- * List linkage, used by a variety of lists:
- * - arena_bin_t's slabs_full
- * - extents_t's LRU
- * - stashed dirty extents
- * - arena's large allocations
- */
- ql_elm(extent_t) ql_link;
- /* Red-black tree linkage, used by arena's extent_avail. */
- rb_node(extent_t) rb_link;
- };
+ /*
+ * List linkage, used by a variety of lists:
+ * - bin_t's slabs_full
+ * - extents_t's LRU
+ * - stashed dirty extents
+ * - arena's large allocations
+ */
+ ql_elm(extent_t) ql_link;
- /* Linkage for per size class sn/address-ordered heaps. */
+ /*
+ * Linkage for per size class sn/address-ordered heaps, and
+ * for extent_avail
+ */
phn(extent_t) ph_link;
union {
@@ -148,7 +168,7 @@ struct extent_s {
};
};
typedef ql_head(extent_t) extent_list_t;
-typedef rb_tree(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_tree_t;
typedef ph(extent_t) extent_heap_t;
/* Quantized collection of extents, with built-in LRU queue. */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index b6905ce..c0561d9 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -6,4 +6,12 @@ typedef struct extents_s extents_t;
#define EXTENT_HOOKS_INITIALIZER NULL
+#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+
#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 188296c..dcfc992 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -260,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t k2 = 0;
switch (len & 15) {
- case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
- case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
- case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
- case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
- case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
- case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
+ case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
+ case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
+ case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
+ case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
+ case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
+ case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; /* falls through */
case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-
- case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
- case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
- case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
- case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
- case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
- case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
- case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
+ /* falls through */
+ case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
+ case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
+ case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
+ case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
+ case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
+ case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
+ case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; /* falls through */
case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
}
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c0f834f..8dad9a1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -33,6 +33,8 @@
* order to yield to another virtual CPU.
*/
#undef CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#undef HAVE_CPU_SPINWAIT
/*
* Number of significant bits in virtual addresses. This may be less than the
@@ -238,6 +240,12 @@
#undef JEMALLOC_CACHE_OBLIVIOUS
/*
+ * If defined, enable logging facilities. We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+#undef JEMALLOC_LOG
+
+/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
@@ -255,6 +263,12 @@
#undef JEMALLOC_HAVE_MADVISE
/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
+
+/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
@@ -271,6 +285,14 @@
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+#undef JEMALLOC_DEFINE_MADVISE_FREE
+
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_DONTDUMP
+
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
@@ -336,4 +358,9 @@
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#undef JEMALLOC_IS_MALLOC
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 24ea416..c6a1f7e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -106,16 +106,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
return &tdata->decay_ticker;
}
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind < NBINS);
- return &tcache->tbins_small[binind];
+ return &tcache->bins_small[binind];
}
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind >= NBINS &&binind < nhbins);
- return &tcache->tbins_large[binind - NBINS];
+ return &tcache->bins_large[binind - NBINS];
}
JEMALLOC_ALWAYS_INLINE bool
@@ -151,6 +151,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
bool fast = tsd_fast(tsd);
+ assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
++*tsd_reentrancy_levelp_get(tsd);
if (fast) {
/* Prepare slow path for reentrancy. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 7ffce6f..c829ac6 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -5,6 +5,24 @@
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/witness.h"
+/*
+ * Translating the names of the 'i' functions:
+ * Abbreviations used in the first part of the function name (before
+ * alloc/dalloc) describe what that function accomplishes:
+ * a: arena (query)
+ * s: size (query, or sized deallocation)
+ * e: extent (query)
+ * p: aligned (allocates)
+ * vs: size (query, without knowing that the pointer is into the heap)
+ * r: rallocx implementation
+ * x: xallocx implementation
+ * Abbreviations used in the second part of the function name (after
+ * alloc/dalloc) describe the arguments it takes
+ * z: whether to return zeroed memory
+ * t: accepts a tcache_t * parameter
+ * m: accepts an arena_t * parameter
+ */
+
JEMALLOC_ALWAYS_INLINE arena_t *
iaalloc(tsdn_t *tsdn, const void *ptr) {
assert(ptr != NULL);
@@ -27,8 +45,10 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
assert(size != 0);
assert(!is_internal || tcache == NULL);
assert(!is_internal || arena == NULL || arena_is_auto(arena));
- witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
- WITNESS_RANK_CORE, 0);
+ if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+ }
ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
if (config_stats && is_internal && likely(ret != NULL)) {
@@ -91,7 +111,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
if (config_stats && is_internal) {
arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
}
- if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+ if (!is_internal && !tsdn_null(tsdn) &&
+ tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
assert(tcache == NULL);
}
arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 4571895..ed75d37 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -37,4 +37,7 @@
# define JET_MUTABLE const
#endif
+#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
+#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
+
#endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 50f9d00..1b750b1 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -79,22 +79,29 @@ typedef int malloc_cpuid_t;
# ifdef __hppa__
# define LG_QUANTUM 4
# endif
+# ifdef __m68k__
+# define LG_QUANTUM 3
+# endif
# ifdef __mips__
# define LG_QUANTUM 3
# endif
+# ifdef __nios2__
+# define LG_QUANTUM 3
+# endif
# ifdef __or1k__
# define LG_QUANTUM 3
# endif
# ifdef __powerpc__
# define LG_QUANTUM 4
# endif
-# ifdef __riscv__
+# if defined(__riscv) || defined(__riscv__)
# define LG_QUANTUM 4
# endif
# ifdef __s390__
# define LG_QUANTUM 4
# endif
-# ifdef __SH4__
+# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+ defined(__SH4_SINGLE_ONLY__))
# define LG_QUANTUM 4
# endif
# ifdef __tile__
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 18539a0..f81f3a4 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -47,6 +47,10 @@
#endif
#include "jemalloc/internal/hooks.h"
+#ifdef JEMALLOC_DEFINE_MADVISE_FREE
+# define JEMALLOC_MADV_FREE 8
+#endif
+
static const bool config_debug =
#ifdef JEMALLOC_DEBUG
true
@@ -61,6 +65,13 @@ static const bool have_dss =
false
#endif
;
+static const bool have_madvise_huge =
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+ true
+#else
+ false
+#endif
+ ;
static const bool config_fill =
#ifdef JEMALLOC_FILL
true
@@ -146,6 +157,17 @@ static const bool config_cache_oblivious =
false
#endif
;
+/*
+ * Undocumented, for jemalloc development use only at the moment. See the note
+ * in jemalloc/internal/log.h.
+ */
+static const bool config_log =
+#ifdef JEMALLOC_LOG
+ true
+#else
+ false
+#endif
+ ;
#ifdef JEMALLOC_HAVE_SCHED_GETCPU
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
new file mode 100644
index 0000000..6420858
--- /dev/null
+++ b/include/jemalloc/internal/log.h
@@ -0,0 +1,115 @@
+#ifndef JEMALLOC_INTERNAL_LOG_H
+#define JEMALLOC_INTERNAL_LOG_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+
+#ifdef JEMALLOC_LOG
+# define JEMALLOC_LOG_VAR_BUFSIZE 1000
+#else
+# define JEMALLOC_LOG_VAR_BUFSIZE 1
+#endif
+
+#define JEMALLOC_LOG_BUFSIZE 4096
+
+/*
+ * The log malloc_conf option is a '|'-delimited list of log_var name segments
+ * which should be logged. The names are themselves hierarchical, with '.' as
+ * the delimiter (a "segment" is just a prefix in the log namespace). So, if
+ * you have:
+ *
+ * log("arena", "log msg for arena"); // 1
+ * log("arena.a", "log msg for arena.a"); // 2
+ * log("arena.b", "log msg for arena.b"); // 3
+ * log("arena.a.a", "log msg for arena.a.a"); // 4
+ * log("extent.a", "log msg for extent.a"); // 5
+ * log("extent.b", "log msg for extent.b"); // 6
+ *
+ * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * 6 will print at runtime. You can enable logging from all log vars by
+ * writing "log=.".
+ *
+ * None of this should be regarded as a stable API for right now. It's intended
+ * as a debugging interface, to let us keep around some of our printf-debugging
+ * statements.
+ */
+
+extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+extern atomic_b_t log_init_done;
+
+typedef struct log_var_s log_var_t;
+struct log_var_s {
+ /*
+ * Lowest bit is "inited", second lowest is "enabled". Putting them in
+ * a single word lets us avoid any fences on weak architectures.
+ */
+ atomic_u_t state;
+ const char *name;
+};
+
+#define LOG_NOT_INITIALIZED 0U
+#define LOG_INITIALIZED_NOT_ENABLED 1U
+#define LOG_ENABLED 2U
+
+#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+
+/*
+ * Returns the value we should assume for state (which is not necessarily
+ * accurate; if logging is done before logging has finished initializing, then
+ * we default to doing the safe thing by logging everything).
+ */
+unsigned log_var_update_state(log_var_t *log_var);
+
+/* We factor out the metadata management to allow us to test more easily. */
+#define log_do_begin(log_var) \
+if (config_log) { \
+ unsigned log_state = atomic_load_u(&(log_var).state, \
+ ATOMIC_RELAXED); \
+ if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \
+ log_state = log_var_update_state(&(log_var)); \
+ assert(log_state != LOG_NOT_INITIALIZED); \
+ } \
+ if (log_state == LOG_ENABLED) { \
+ {
+ /* User code executes here. */
+#define log_do_end(log_var) \
+ } \
+ } \
+}
+
+/*
+ * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
+ * preprocessing. To work around this, we take all potential extra arguments in
+ * a var-args functions. Since a varargs macro needs at least one argument in
+ * the "...", we accept the format string there, and require that the first
+ * argument in this "..." is a const char *.
+ */
+static inline void
+log_impl_varargs(const char *name, ...) {
+ char buf[JEMALLOC_LOG_BUFSIZE];
+ va_list ap;
+
+ va_start(ap, name);
+ const char *format = va_arg(ap, const char *);
+ size_t dst_offset = 0;
+ dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
+ dst_offset += malloc_vsnprintf(buf + dst_offset,
+ JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+ dst_offset += malloc_snprintf(buf + dst_offset,
+ JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+ va_end(ap);
+
+ malloc_write(buf);
+}
+
+/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
+#define LOG(log_var_str, ...) \
+do { \
+ static log_var_t log_var = LOG_VAR_INIT(log_var_str); \
+ log_do_begin(log_var) \
+ log_impl_varargs((log_var).name, __VA_ARGS__); \
+ log_do_end(log_var) \
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 47ae58e..4992d1d 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -53,6 +53,10 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
va_list ap);
size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
JEMALLOC_FORMAT_PRINTF(3, 4);
+/*
+ * The caller can set write_cb and cbopaque to null to choose to print with the
+ * je_malloc_message hook.
+ */
void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, va_list ap);
void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 3358bcf..735c0ad 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -35,21 +35,34 @@ typedef enum {
mutex_prof_num_arena_mutexes
} mutex_prof_arena_ind_t;
-#define MUTEX_PROF_COUNTERS \
+#define MUTEX_PROF_UINT64_COUNTERS \
OP(num_ops, uint64_t) \
OP(num_wait, uint64_t) \
- OP(num_spin_acq, uint64_t) \
- OP(num_owner_switch, uint64_t) \
- OP(total_wait_time, uint64_t) \
- OP(max_wait_time, uint64_t) \
+ OP(num_spin_acq, uint64_t) \
+ OP(num_owner_switch, uint64_t) \
+ OP(total_wait_time, uint64_t) \
+ OP(max_wait_time, uint64_t)
+
+#define MUTEX_PROF_UINT32_COUNTERS \
OP(max_num_thds, uint32_t)
-typedef enum {
+#define MUTEX_PROF_COUNTERS \
+ MUTEX_PROF_UINT64_COUNTERS \
+ MUTEX_PROF_UINT32_COUNTERS
+
#define OP(counter, type) mutex_counter_##counter,
- MUTEX_PROF_COUNTERS
+
+#define COUNTER_ENUM(counter_list, t) \
+ typedef enum { \
+ counter_list \
+ mutex_prof_num_##t##_counters \
+ } mutex_prof_##t##_counter_ind_t;
+
+COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
+COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
+
+#undef COUNTER_ENUM
#undef OP
- mutex_prof_num_counters
-} mutex_prof_counter_ind_t;
typedef struct {
/*
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 28383b7..dff2051 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,9 @@ static const bool pages_can_purge_forced =
#endif
;
+/* Whether transparent huge page state is "madvise". */
+extern bool thp_state_madvise;
+
void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
void pages_unmap(void *addr, size_t size);
bool pages_commit(void *addr, size_t size);
@@ -66,6 +69,8 @@ bool pages_purge_lazy(void *addr, size_t size);
bool pages_purge_forced(void *addr, size_t size);
bool pages_huge(void *addr, size_t size);
bool pages_nohuge(void *addr, size_t size);
+bool pages_dontdump(void *addr, size_t size);
+bool pages_dodump(void *addr, size_t size);
bool pages_boot(void);
#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b5d4db3..4563db2 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -178,9 +178,21 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
JEMALLOC_ALWAYS_INLINE extent_t *
rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+# ifdef __aarch64__
+ /*
+ * aarch64 doesn't sign extend the highest virtual address bit to set
+ * the higher ones. Instead, the high bits gets zeroed.
+ */
+ uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
+ /* Mask off the slab bit. */
+ uintptr_t low_bit_mask = ~(uintptr_t)1;
+ uintptr_t mask = high_bit_mask & low_bit_mask;
+ return (extent_t *)(bits & mask);
+# else
/* Restore sign-extended high bits, mask slab bit. */
return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
RTREE_NHIB) & ~((uintptr_t)0x1));
+# endif
}
JEMALLOC_ALWAYS_INLINE szind_t
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 3cdc862..93a7517 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -26,7 +26,7 @@
* Zero initializer required for tsd initialization only. Proper initialization
* done via rtree_ctx_data_init().
*/
-#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index e2afc98..22804c6 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,25 +1,29 @@
#ifndef JEMALLOC_INTERNAL_SPIN_H
#define JEMALLOC_INTERNAL_SPIN_H
-#ifdef JEMALLOC_SPIN_C_
-# define SPIN_INLINE extern inline
-#else
-# define SPIN_INLINE inline
-#endif
-
#define SPIN_INITIALIZER {0U}
typedef struct {
unsigned iteration;
} spin_t;
-SPIN_INLINE void
+static inline void
+spin_cpu_spinwait() {
+# if HAVE_CPU_SPINWAIT
+ CPU_SPINWAIT;
+# else
+ volatile int x = 0;
+ x = x;
+# endif
+}
+
+static inline void
spin_adaptive(spin_t *spin) {
volatile uint32_t i;
if (spin->iteration < 5) {
for (i = 0; i < (1U << spin->iteration); i++) {
- CPU_SPINWAIT;
+ spin_cpu_spinwait();
}
spin->iteration++;
} else {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 1198779..852e342 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,12 +1,6 @@
#ifndef JEMALLOC_INTERNAL_STATS_H
#define JEMALLOC_INTERNAL_STATS_H
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
-
/* OPTION(opt, var_name, default, set_value_to) */
#define STATS_PRINT_OPTIONS \
OPTION('J', json, false, true) \
@@ -33,132 +27,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1];
void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
const char *opts);
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
-typedef struct malloc_bin_stats_s {
- /*
- * Total number of allocation/deallocation requests served directly by
- * the bin. Note that tcache may allocate an object, then recycle it
- * many times, resulting many increments to nrequests, but only one
- * each to nmalloc and ndalloc.
- */
- uint64_t nmalloc;
- uint64_t ndalloc;
-
- /*
- * Number of allocation requests that correspond to the size of this
- * bin. This includes requests served by tcache, though tcache only
- * periodically merges into this counter.
- */
- uint64_t nrequests;
-
- /*
- * Current number of regions of this size class, including regions
- * currently cached by tcache.
- */
- size_t curregs;
-
- /* Number of tcache fills from this bin. */
- uint64_t nfills;
-
- /* Number of tcache flushes to this bin. */
- uint64_t nflushes;
-
- /* Total number of slabs created for this bin's size class. */
- uint64_t nslabs;
-
- /*
- * Total number of slabs reused by extracting them from the slabs heap
- * for this bin's size class.
- */
- uint64_t reslabs;
-
- /* Current number of slabs in this bin. */
- size_t curslabs;
-
- mutex_prof_data_t mutex_data;
-} malloc_bin_stats_t;
-
-typedef struct malloc_large_stats_s {
- /*
- * Total number of allocation/deallocation requests served directly by
- * the arena.
- */
- arena_stats_u64_t nmalloc;
- arena_stats_u64_t ndalloc;
-
- /*
- * Number of allocation requests that correspond to this size class.
- * This includes requests served by tcache, though tcache only
- * periodically merges into this counter.
- */
- arena_stats_u64_t nrequests; /* Partially derived. */
-
- /* Current number of allocations of this size class. */
- size_t curlextents; /* Derived. */
-} malloc_large_stats_t;
-
-typedef struct decay_stats_s {
- /* Total number of purge sweeps. */
- arena_stats_u64_t npurge;
- /* Total number of madvise calls made. */
- arena_stats_u64_t nmadvise;
- /* Total number of pages purged. */
- arena_stats_u64_t purged;
-} decay_stats_t;
-
-/*
- * Arena stats. Note that fields marked "derived" are not directly maintained
- * within the arena code; rather their values are derived during stats merge
- * requests.
- */
-typedef struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
- malloc_mutex_t mtx;
-#endif
-
- /* Number of bytes currently mapped, excluding retained memory. */
- atomic_zu_t mapped; /* Partially derived. */
-
- /*
- * Number of unused virtual memory bytes currently retained. Retained
- * bytes are technically mapped (though always decommitted or purged),
- * but they are excluded from the mapped statistic (above).
- */
- atomic_zu_t retained; /* Derived. */
-
- decay_stats_t decay_dirty;
- decay_stats_t decay_muzzy;
-
- atomic_zu_t base; /* Derived. */
- atomic_zu_t internal;
- atomic_zu_t resident; /* Derived. */
-
- atomic_zu_t allocated_large; /* Derived. */
- arena_stats_u64_t nmalloc_large; /* Derived. */
- arena_stats_u64_t ndalloc_large; /* Derived. */
- arena_stats_u64_t nrequests_large; /* Derived. */
-
- /* Number of bytes cached in tcache associated with this arena. */
- atomic_zu_t tcache_bytes; /* Derived. */
-
- mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
-
- /* One element for each large size class. */
- malloc_large_stats_t lstats[NSIZES - NBINS];
-
- /* Arena uptime. */
- nstime_t uptime;
-} arena_stats_t;
-
#endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/include/jemalloc/internal/stats_tsd.h b/include/jemalloc/internal/stats_tsd.h
deleted file mode 100644
index d0c3bbe..0000000
--- a/include/jemalloc/internal/stats_tsd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
-#define JEMALLOC_INTERNAL_STATS_TSD_H
-
-typedef struct tcache_bin_stats_s {
- /*
- * Number of allocation requests that corresponded to the size of this
- * bin.
- */
- uint64_t nrequests;
-} tcache_bin_stats_t;
-
-#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 7f640d5..9794628 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -61,7 +61,7 @@ sz_psz2ind(size_t psz) {
pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
- size_t delta_inverse_mask = ZD(-1) << lg_delta;
+ size_t delta_inverse_mask = ZU(-1) << lg_delta;
pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
@@ -142,7 +142,7 @@ sz_size2index_compute(size_t size) {
szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
- size_t delta_inverse_mask = ZD(-1) << lg_delta;
+ size_t delta_inverse_mask = ZU(-1) << lg_delta;
szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index db3e9c7..790367b 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -6,7 +6,7 @@
extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
-extern tcache_bin_info_t *tcache_bin_info;
+extern cache_bin_info_t *tcache_bin_info;
/*
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
@@ -30,10 +30,10 @@ extern tcaches_t *tcaches;
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
- tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+ cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
szind_t binind, unsigned rem);
-void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
unsigned rem, tcache_t *tcache);
void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
arena_t *arena);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c55bcd2..0a6feb5 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
#define JEMALLOC_INTERNAL_TCACHE_INLINES_H
+#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sz.h"
@@ -38,43 +39,16 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
- void *ret;
-
- if (unlikely(tbin->ncached == 0)) {
- tbin->low_water = -1;
- *tcache_success = false;
- return NULL;
- }
- /*
- * tcache_success (instead of ret) should be checked upon the return of
- * this function. We avoid checking (ret == NULL) because there is
- * never a null stored on the avail stack (which is unknown to the
- * compiler), and eagerly checking ret would cause pipeline stall
- * (waiting for the cacheline).
- */
- *tcache_success = true;
- ret = *(tbin->avail - tbin->ncached);
- tbin->ncached--;
-
- if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
- tbin->low_water = tbin->ncached;
- }
-
- return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
- tcache_bin_t *tbin;
+ cache_bin_t *bin;
bool tcache_success;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
assert(binind < NBINS);
- tbin = tcache_small_bin_get(tcache, binind);
- ret = tcache_alloc_easy(tbin, &tcache_success);
+ bin = tcache_small_bin_get(tcache, binind);
+ ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
bool tcache_hard_success;
@@ -84,7 +58,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
- tbin, binind, &tcache_hard_success);
+ bin, binind, &tcache_hard_success);
if (tcache_hard_success == false) {
return NULL;
}
@@ -103,22 +77,21 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
if (likely(!zero)) {
if (slow_path && config_fill) {
if (unlikely(opt_junk_alloc)) {
- arena_alloc_junk_small(ret,
- &arena_bin_info[binind], false);
+ arena_alloc_junk_small(ret, &bin_infos[binind],
+ false);
} else if (unlikely(opt_zero)) {
memset(ret, 0, usize);
}
}
} else {
if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
- arena_alloc_junk_small(ret, &arena_bin_info[binind],
- true);
+ arena_alloc_junk_small(ret, &bin_infos[binind], true);
}
memset(ret, 0, usize);
}
if (config_stats) {
- tbin->tstats.nrequests++;
+ bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
- tcache_bin_t *tbin;
+ cache_bin_t *bin;
bool tcache_success;
assert(binind >= NBINS &&binind < nhbins);
- tbin = tcache_large_bin_get(tcache, binind);
- ret = tcache_alloc_easy(tbin, &tcache_success);
+ bin = tcache_large_bin_get(tcache, binind);
+ ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
/*
@@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
if (config_stats) {
- tbin->tstats.nrequests++;
+ bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@@ -190,24 +163,24 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
- tcache_bin_t *tbin;
- tcache_bin_info_t *tbin_info;
+ cache_bin_t *bin;
+ cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
if (slow_path && config_fill && unlikely(opt_junk_free)) {
- arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+ arena_dalloc_junk_small(ptr, &bin_infos[binind]);
}
- tbin = tcache_small_bin_get(tcache, binind);
- tbin_info = &tcache_bin_info[binind];
- if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
- tcache_bin_flush_small(tsd, tcache, tbin, binind,
- (tbin_info->ncached_max >> 1));
+ bin = tcache_small_bin_get(tcache, binind);
+ bin_info = &tcache_bin_info[binind];
+ if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ tcache_bin_flush_small(tsd, tcache, bin, binind,
+ (bin_info->ncached_max >> 1));
}
- assert(tbin->ncached < tbin_info->ncached_max);
- tbin->ncached++;
- *(tbin->avail - tbin->ncached) = ptr;
+ assert(bin->ncached < bin_info->ncached_max);
+ bin->ncached++;
+ *(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}
@@ -215,8 +188,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
- tcache_bin_t *tbin;
- tcache_bin_info_t *tbin_info;
+ cache_bin_t *bin;
+ cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
@@ -225,15 +198,15 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
large_dalloc_junk(ptr, sz_index2size(binind));
}
- tbin = tcache_large_bin_get(tcache, binind);
- tbin_info = &tcache_bin_info[binind];
- if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
- tcache_bin_flush_large(tsd, tbin, binind,
- (tbin_info->ncached_max >> 1), tcache);
+ bin = tcache_large_bin_get(tcache, binind);
+ bin_info = &tcache_bin_info[binind];
+ if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ tcache_bin_flush_large(tsd, bin, binind,
+ (bin_info->ncached_max >> 1), tcache);
}
- assert(tbin->ncached < tbin_info->ncached_max);
- tbin->ncached++;
- *(tbin->avail - tbin->ncached) = ptr;
+ assert(bin->ncached < bin_info->ncached_max);
+ bin->ncached++;
+ *(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 7eb516f..07b7387 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -3,54 +3,51 @@
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
+#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/ticker.h"
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
- unsigned ncached_max; /* Upper limit on ncached. */
-};
-
-struct tcache_bin_s {
- low_water_t low_water; /* Min # cached since last GC. */
- uint32_t ncached; /* # of cached objects. */
+struct tcache_s {
/*
- * ncached and stats are both modified frequently. Let's keep them
- * close so that they have a higher chance of being on the same
- * cacheline, thus less write-backs.
+ * To minimize our cache-footprint, we put the frequently accessed data
+ * together at the start of this struct.
*/
- tcache_bin_stats_t tstats;
+
+ /* Cleared after arena_prof_accum(). */
+ uint64_t prof_accumbytes;
+ /* Drives incremental GC. */
+ ticker_t gc_ticker;
/*
- * To make use of adjacent cacheline prefetch, the items in the avail
- * stack goes to higher address for newer allocations. avail points
- * just above the available space, which means that
- * avail[-ncached, ... -1] are available items and the lowest item will
- * be allocated first.
+ * The pointer stacks associated with bins follow as a contiguous array.
+ * During tcache initialization, the avail pointer in each element of
+ * tbins is initialized to point to the proper offset within this array.
*/
- void **avail; /* Stack of available objects. */
-};
+ cache_bin_t bins_small[NBINS];
-struct tcache_s {
- /* Data accessed frequently first: prof, ticker and small bins. */
- uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
- ticker_t gc_ticker; /* Drives incremental GC. */
/*
- * The pointer stacks associated with tbins follow as a contiguous
- * array. During tcache initialization, the avail pointer in each
- * element of tbins is initialized to point to the proper offset within
- * this array.
+ * This data is less hot; we can be a little less careful with our
+ * footprint here.
*/
- tcache_bin_t tbins_small[NBINS];
- /* Data accessed less often below. */
- ql_elm(tcache_t) link; /* Used for aggregating stats. */
- arena_t *arena; /* Associated arena. */
- szind_t next_gc_bin; /* Next bin to GC. */
+ /* Lets us track all the tcaches in an arena. */
+ ql_elm(tcache_t) link;
+ /*
+ * The descriptor lets the arena find our cache bins without seeing the
+ * tcache definition. This enables arenas to aggregate stats across
+ * tcaches without having a tcache dependency.
+ */
+ cache_bin_array_descriptor_t cache_bin_array_descriptor;
+
+ /* The arena this tcache is associated with. */
+ arena_t *arena;
+ /* Next bin to GC. */
+ szind_t next_gc_bin;
/* For small bins, fill (ncached_max >> lg_fill_div). */
uint8_t lg_fill_div[NBINS];
- tcache_bin_t tbins_large[NSIZES-NBINS];
+ /*
+ * We put the cache bins for large size classes at the end of the
+ * struct, since some of them might not get used. This might end up
+ * letting us avoid touching an extra page if we don't have to.
+ */
+ cache_bin_t bins_large[NSIZES-NBINS];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 1155d62..e49bc9d 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -3,14 +3,9 @@
#include "jemalloc/internal/size_classes.h"
-typedef struct tcache_bin_info_s tcache_bin_info_t;
-typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
-/* ncached is cast to this type for comparison. */
-typedef int32_t low_water_t;
-
/*
* tcache pointers close to NULL are used to encode state information that is
* used for two purposes: preventing thread caching on a per thread basis and
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 155a2ec..0b9841a 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -65,6 +65,7 @@ typedef void (*test_callback_t)(int *);
O(arenas_tdata_bypass, bool, bool) \
O(reentrancy_level, int8_t, int8_t) \
O(narenas_tdata, uint32_t, uint32_t) \
+ O(offset_state, uint64_t, uint64_t) \
O(thread_allocated, uint64_t, uint64_t) \
O(thread_deallocated, uint64_t, uint64_t) \
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
@@ -84,6 +85,7 @@ typedef void (*test_callback_t)(int *);
0, \
0, \
0, \
+ 0, \
NULL, \
RTREE_CTX_ZERO_INITIALIZER, \
NULL, \
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 33be666..7ace8ae 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -51,7 +51,7 @@
#define WITNESS_RANK_ARENA_LARGE 19U
#define WITNESS_RANK_LEAF 0xffffffffU
-#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF
+#define WITNESS_RANK_BIN WITNESS_RANK_LEAF
#define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF
#define WITNESS_RANK_DSS WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF
diff --git a/include/jemalloc/jemalloc_mangle.sh b/include/jemalloc/jemalloc_mangle.sh
index df328b7..c675bb4 100755
--- a/include/jemalloc/jemalloc_mangle.sh
+++ b/include/jemalloc/jemalloc_mangle.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -eu
public_symbols_txt=$1
symbol_prefix=$2